Manually set working directory to source file location (Session Tab)

To-Do:

Add R-squared

Add double single support

Add Interaction

Sensitivity analysis/cluster by participant

library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## Warning: package 'lubridate' was built under R version 4.1.3
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v forcats   1.0.0     v readr     2.1.4
## v ggplot2   3.4.4     v stringr   1.5.0
## v lubridate 1.9.2     v tibble    3.2.1
## v purrr     1.0.1     v tidyr     1.3.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## i Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggplot2)
library(broom)
## Warning: package 'broom' was built under R version 4.1.3

set output directory

# analysis folder 
analysis_version <- '005'

# video metric folder version 
metrics_version <- '004'
output_dir <- file.path("C:/Users/mmccu/Box/MM_Personal/5_Projects/BoveLab/3_Data_and_Code/gait_bw_zeno_home_analysis",
                    analysis_version, 
                    "003_regression_video_metric_vs_outcomes")

scatter_dir <- file.path(output_dir, "scatterplots")

Functions

Function - box and whisker plots regression estimates

Significance based transparency

univariate_regression_plot <- function(results, plot_title, x_adj) {
  # Apply rounding function to p-value columns 
  results <- results %>% mutate(pval_text = paste0("p=", sapply(p.value, format_p_value)))  # Format p-values
  
  # Apply formatting function and create significance indicators
  results <- results %>%
    mutate(
      pval_text = paste0("p=", sapply(p.value, format_p_value)),
      sig = ifelse(p.value < 0.05, "Significant", "Non-Significant"),  # Create significance group
      alpha_level = ifelse(p.value < 0.05, 1, 0.3)  # More transparency for non-sig points
    )
  results
  
  # Determine x position for p-values (offset to the right of the max estimate)
  x_max <- max(results$conf.high, na.rm = TRUE)  # Get max confidence interval upper bound
  results <- results %>% mutate(pval_x_pos = x_max + x_adj)  # Place p-values slightly to the right of max x range
  
  p <- ggplot(results, aes(x = estimate, y = term, color = Variable, alpha = alpha_level))+ 
    geom_point(size = 3)  + # dot = coefficient estimate 
    geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2) +  # Whiskers for confidence intervals
    geom_text(aes(x = pval_x_pos, label = pval_text, fontface = ifelse(p.value < 0.05, "bold", "plain")), 
              hjust = 1, size = 3) +  # Right-aligned p-values, bold if significant
    scale_alpha_identity() +  # Use manually set alpha levels
    geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +  # Reference line at zero
    theme_minimal() + 
    labs(title = plot_title,
         x = "Estimate (Effect Size)",
         y = "Predictor") +
    theme(legend.position = "right")  # Hide legend to keep it clean
  
  return(p)
}

Round p-values

# Conditional rounding function
format_p_value <- function(p) {
  if (p < 0.001) {
    return(formatC(p, format = "e", digits = 1))  # Scientific notation, 1 decimal place
  } else {
    return(formatC(p, format = "f", digits = 2))  # Standard notation, 2 decimal places
  }
}

Plot Adjusted vs Unadjusted coefficient estimates

adj_vs_unadj_regression_plot <- function(results_df, plot_title, x_adj){
  
  set.seed(42)  # Set seed for reproducibility
  
  results_df <- results_df %>%
    mutate(
      pval_text = paste0("p=", sapply(p.value, format_p_value)),  # Format p-values
      sig = ifelse(is.na(p.value) | p.value >= 0.05, "Non-Significant", "Significant"),  # Identify significance
      alpha_level = ifelse(is.na(p.value) | p.value >= 0.05, 0.3, 1),  # More transparency for non-sig points
      jitter_y = as.numeric(factor(term)) + runif(n(), -0.2, 0.2)  # Jitter y-axis values slightly
    )
  
  # Define colors for models
  model_colors <- c("Unadjusted" = "darkblue", "Adjusted" = "darkorange")
  
  # Define shading by creating an index for each predictor group
  results_df <- results_df %>%
    arrange(term) %>%
    mutate(group = as.integer(factor(term)) %% 2)  # Alternating shading
  
  
  # Determine x position for p-values (offset to the right of max estimate)
  x_max <- max(results_df$conf.high, na.rm = TRUE)  # Get max confidence interval upper bound
  results_df <- results_df %>%
    mutate(pval_x_pos = x_max + x_adj)  # Place p-values slightly to the right of max x range
  
  # Dot-and-whisker plot with enhancements
  p2 <- ggplot(results_df, aes(x = estimate, y = term, color = Model, alpha = alpha_level)) +
    # Add confidence intervals as horizontal whiskers
    geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2, 
                   position = position_dodge(width = 0.5)) + 
    # Add dots for coefficient estimates
    geom_point(position = position_dodge(width = 0.5), size = 3) + 
    # Add background shading for alternate rows
    geom_rect(data = results_df, aes(ymin = as.numeric(factor(term)) - 0.5, 
                                     ymax = as.numeric(factor(term)) + 0.5,
                                     xmin = -Inf, xmax = Inf, 
                                     fill = factor(group)),
              inherit.aes = FALSE, alpha = 0.1) +
    # Add vertical line at zero (null effect)
    geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") + 
    # Add jittered p-values at right side
    geom_text(aes(x = pval_x_pos, y = jitter_y, label = pval_text, 
                  fontface = ifelse(sig == "Significant", "bold", "plain")), 
              hjust = 1, size = 3) + 
    scale_color_manual(values = model_colors) +  # Set custom colors for models
    scale_fill_manual(values = c("white", "gray90")) +  # Alternating shading
    scale_alpha_identity() +  # Use manually set alpha levels
    theme_minimal() +
    labs(title = plot_title,
         x = "Estimate (Effect Size)",
         y = "Predictor",
         color = "Model") +  
   theme(legend.position = "right",  # Move legend to the right
          panel.grid.major = element_blank(),  # Remove major grid lines for clarity
          panel.grid.minor = element_blank())
    

  return(p2)
  
}

Load and format In-Person Video Data

All videos included in analysis. Videos were included based on if a walking segment could be identified. Some participants may have both a fast walk and preferred walking speed video. Some may have just fast walk or just preferred walking speed.

Each row = 1 video

Preferred walking speed

# Preferred Walking Speed 
zeno_pws_path <- file.path("C:/Users/mmccu/Box/MM_Personal/5_Projects/BoveLab/3_Data_and_Code/gait_bw_zeno_home_analysis", 
                           metrics_version, 
                           "000_merged_cleaned_data",
                           "zv_bw_merged_gait_vertical_PWS_1_clean.csv")
zeno_pws_df <- read.csv(zeno_pws_path)
table(zeno_pws_df$task_pose)
## 
## gait_vertical_PWS_1 
##                 224

White Not Hispanic as reference because highest %, rest alphabetical

# assign levels to categorical variables 
table(zeno_pws_df$race_ethnicity_clean)
## 
##                     Asian Black Or African American        Hispanic or Latino 
##                        17                        14                        21 
##    Other/Unknown/Declined        White Not Hispanic 
##                        18                       154
zeno_pws_df$race_ethnicity_clean <- factor(zeno_pws_df$race_ethnicity_clean, 
                                           levels = c('White Not Hispanic', 
                                                      'Asian', 
                                                      'Black Or African American',
                                                      'Hispanic or Latino',
                                                      'Other/Unknown/Declined'), 
                                           ordered = FALSE)
print(levels(zeno_pws_df$race_ethnicity_clean))
## [1] "White Not Hispanic"        "Asian"                    
## [3] "Black Or African American" "Hispanic or Latino"       
## [5] "Other/Unknown/Declined"
table(zeno_pws_df$ms_dx_condensed)
## 
## MS, Subtype Not Specified            Progressive MS                      RRMS 
##                         3                        40                       181
zeno_pws_df$ms_dx_condensed <- factor(zeno_pws_df$ms_dx_condensed, 
                                           levels = c('RRMS', 
                                                      'Progressive MS',
                                                      'MS, Subtype Not Specified'), 
                                           ordered = FALSE)
print(levels(zeno_pws_df$ms_dx_condensed))
## [1] "RRMS"                      "Progressive MS"           
## [3] "MS, Subtype Not Specified"
table(zeno_pws_df$clean_sex)
## 
##     Female       Male Non-Binary 
##        168         54          2
zeno_pws_df$clean_sex <- factor(zeno_pws_df$clean_sex, 
                                           levels = c('Female', 
                                                      'Male',
                                                      'Non-Binary'), 
                                           ordered = FALSE)
print(levels(zeno_pws_df$clean_sex))
## [1] "Female"     "Male"       "Non-Binary"

Drop healthy controls - no EDSS and T25FW

zeno_pws_df <- zeno_pws_df[grepl('MS', zeno_pws_df$demographic_diagnosis), ]
table(zeno_pws_df$demographic_diagnosis)
## 
##  MS 
## 224
# convert infinite values to NA 
zeno_pws_df[] <- lapply(zeno_pws_df, function(x) {
  if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})
# filter to unique IDs - one row per person, select first video 
nrow(zeno_pws_df)
## [1] 224
zeno_pws_uniqueid_df <- zeno_pws_df %>%
  arrange(visit_date_video) %>%   # Sort by date
  distinct(bw_id, .keep_all = TRUE)  # Keep the first occurrence of each ID

nrow(zeno_pws_uniqueid_df)
## [1] 154

Missing Data

# count number of missing variables in preferred walking speed data frame 
sum(is.na(zeno_pws_df$delta_pix_h_rel_median_pose_zv))
## [1] 16
sum(is.na(zeno_pws_df$stride_time_mean_sec_pose_zv))
## [1] 48
sum(is.na(zeno_pws_df$mean_cadence_step_per_min_pose_zv))
## [1] 40
sum(is.na(zeno_pws_df$stride_width_mean_cm_pose_zv))
## [1] 40
# stance and all double support measures 
sum(is.na(zeno_pws_df$foot1_gait_cycle_time_mean_pose_zv))
## [1] 163
# ground truth preferred walk zeno mat data 
sum(is.na(zeno_pws_df$PWS_cadencestepsminmean))
## [1] 0
sum(is.na(zeno_pws_df$bingoEHR_EDSS_measure_value))
## [1] 0
sum(is.na(zeno_pws_df$msfcEHR_T25FW.SPEED.AVG))
## [1] 0
# demographics 
sum(is.na(zeno_pws_df$demoEHR_Age))
## [1] 0
sum(is.na(zeno_pws_df$demoEHR_DiseaseDuration)) 
## [1] 0
sum(is.na(zeno_pws_df$ms_dx_condensed))
## [1] 0
sum(is.na(zeno_pws_df$clean_ethnicity))
## [1] 0
sum(is.na(zeno_pws_df$clean_sex))
## [1] 0
sum(is.na(zeno_pws_df$ms_dx_condensed))
## [1] 0

Fast walking speed videos

zeno_fw_path <- file.path("C:/Users/mmccu/Box/MM_Personal/5_Projects/BoveLab/3_Data_and_Code/gait_bw_zeno_home_analysis", 
                          metrics_version, 
                          "000_merged_cleaned_data", 
                          "zv_bw_merged_gait_vertical_FW_1_clean.csv")
zeno_fw_df <- read.csv(zeno_fw_path)
table(zeno_fw_df$task_pose)
## 
## gait_vertical_FW_1 
##                222
# assign levels to categorical variables 
table(zeno_fw_df$race_ethnicity_clean)
## 
##                     Asian Black Or African American        Hispanic or Latino 
##                        17                        14                        20 
##    Other/Unknown/Declined        White Not Hispanic 
##                        18                       153
zeno_fw_df$race_ethnicity_clean <- factor(zeno_fw_df$race_ethnicity_clean, 
                                           levels = c('White Not Hispanic', 
                                                      'Asian', 
                                                      'Black Or African American',
                                                      'Hispanic or Latino',
                                                      'Other/Unknown/Declined'), 
                                           ordered = FALSE)
print(levels(zeno_fw_df$race_ethnicity_clean))
## [1] "White Not Hispanic"        "Asian"                    
## [3] "Black Or African American" "Hispanic or Latino"       
## [5] "Other/Unknown/Declined"
table(zeno_fw_df$ms_dx_condensed)
## 
## MS, Subtype Not Specified            Progressive MS                      RRMS 
##                         2                        39                       181
zeno_fw_df$ms_dx_condensed <- factor(zeno_fw_df$ms_dx_condensed, 
                                           levels = c('RRMS', 
                                                      'Progressive MS',
                                                      'MS, Subtype Not Specified'), 
                                           ordered = FALSE)
print(levels(zeno_fw_df$ms_dx_condensed))
## [1] "RRMS"                      "Progressive MS"           
## [3] "MS, Subtype Not Specified"
table(zeno_fw_df$clean_sex)
## 
##     Female       Male Non-Binary 
##        167         53          2
zeno_fw_df$clean_sex <- factor(zeno_fw_df$clean_sex, 
                                           levels = c('Female', 
                                                      'Male',
                                                      'Non-Binary'), 
                                           ordered = FALSE)
print(levels(zeno_fw_df$clean_sex))
## [1] "Female"     "Male"       "Non-Binary"
zeno_fw_df <- zeno_fw_df[grepl('MS', zeno_fw_df$demographic_diagnosis), ]
table(zeno_fw_df$demographic_diagnosis)
## 
##  MS 
## 222
# convert infinite values to NA 
zeno_fw_df[] <- lapply(zeno_fw_df, function(x) {
  if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})
# filter to unique IDs - one row per person, select first video 
nrow(zeno_fw_df)
## [1] 222
zeno_fw_uniqueid_df <- zeno_fw_df %>%
  arrange(visit_date_video) %>%   # Sort by date
  distinct(bw_id, .keep_all = TRUE)  # Keep the first occurrence of each ID

nrow(zeno_fw_uniqueid_df)
## [1] 154
# count number of missing variables in fast walking speed data frame 
sum(is.na(zeno_fw_df$delta_pix_h_rel_median_pose_zv))
## [1] 3
sum(is.na(zeno_fw_df$stride_time_mean_sec_pose_zv))
## [1] 53
sum(is.na(zeno_fw_df$mean_cadence_step_per_min_pose_zv))
## [1] 46
sum(is.na(zeno_fw_df$stride_width_mean_cm_pose_zv))
## [1] 47
# stance and all double support measures 
sum(is.na(zeno_fw_df$foot1_gait_cycle_time_mean_pose_zv))
## [1] 188
# ground truth preferred walk zeno mat data 
sum(is.na(zeno_fw_df$PWS_cadencestepsminmean))
## [1] 0
sum(is.na(zeno_fw_df$bingoEHR_EDSS_measure_value))
## [1] 0
sum(is.na(zeno_fw_df$msfcEHR_T25FW.SPEED.AVG))
## [1] 0
# demographics 
sum(is.na(zeno_fw_df$demoEHR_Age))
## [1] 0
sum(is.na(zeno_fw_df$demoEHR_DiseaseDuration)) 
## [1] 0
sum(is.na(zeno_fw_df$race_ethnicity_clean))
## [1] 0
sum(is.na(zeno_fw_df$clean_sex))
## [1] 0
sum(is.na(zeno_fw_df$ms_dx_condensed))
## [1] 0

Load and format home video data

Some participants have videos from multiple timepoints (baseline and follow up) At each timepoint, participants sent back two videos - one turning to the right (gait_vertical_right task) and one turning to the left (gait_vertical_left task)

home_path <- file.path("C:/Users/mmccu/Box/MM_Personal/5_Projects/BoveLab/3_Data_and_Code/gait_bw_zeno_home_analysis",
                       metrics_version, 
                       "000_merged_cleaned_data", 
                       "hv_bw_merged_clean.csv")
home_df <- read.csv(home_path)
nrow(home_df)
## [1] 65
table(home_df$demographic_diagnosis)
## 
## MS 
## 65
table(home_df$task_pose_hv)
## 
##  gait_vertical_left gait_vertical_right 
##                  32                  33
# assign levels to categorical variables 
table(home_df$race_ethnicity_clean)
## 
##                     Asian Black Or African American        Hispanic or Latino 
##                         4                         2                         2 
##    Other/Unknown/Declined        White Not Hispanic 
##                         9                        48
home_df$race_ethnicity_clean <- factor(home_df$race_ethnicity_clean, 
                                            levels = c('White Not Hispanic', 
                                                      'Asian', 
                                                      'Black Or African American',
                                                      'Hispanic or Latino',
                                                      'Other/Unknown/Declined'),  
                                           ordered = FALSE)
print(levels(home_df$race_ethnicity_clean))
## [1] "White Not Hispanic"        "Asian"                    
## [3] "Black Or African American" "Hispanic or Latino"       
## [5] "Other/Unknown/Declined"
table(home_df$ms_dx_condensed)
## 
## MS, Subtype Not Specified            Progressive MS                      RRMS 
##                         4                         9                        52
home_df$ms_dx_condensed <- factor(home_df$ms_dx_condensed, 
                                           levels = c('RRMS', 
                                                      'Progressive MS',
                                                      'MS, Subtype Not Specified'), 
                                           ordered = FALSE)
print(levels(home_df$ms_dx_condensed))
## [1] "RRMS"                      "Progressive MS"           
## [3] "MS, Subtype Not Specified"
table(home_df$clean_sex)
## 
##     Female       Male Non-Binary 
##         52         11          2
home_df$clean_sex <- factor(home_df$clean_sex, 
                                           levels = c('Female', 
                                                      'Male',
                                                      'Non-Binary'), 
                                           ordered = FALSE)
print(levels(home_df$clean_sex))
## [1] "Female"     "Male"       "Non-Binary"
# convert infinite values to NA 
home_df[] <- lapply(home_df, function(x) {
  if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})

Group by left and right turning videos, unique IDs

# right turning videos  
home_r_df <- home_df[grepl('gait_vertical_right', home_df$task_pose_hv), ]
nrow(home_r_df)
## [1] 33
table(home_r_df$task_pose_hv)
## 
## gait_vertical_right 
##                  33
# left turning videos 
home_l_df <- home_df[grepl('gait_vertical_left', home_df$task_pose_hv), ]
nrow(home_l_df)
## [1] 32
table(home_l_df$task_pose_hv)
## 
## gait_vertical_left 
##                 32
# filter to unique IDs - one row per person, select first video 
home_uniqueid_df <- home_df %>%
  arrange(visit_date_video) %>%   # Sort by date
  distinct(bw_id, .keep_all = TRUE)  # Keep the first occurrence of each ID

nrow(home_uniqueid_df)
## [1] 31
# count number of missing variables in home video data frame 
sum(is.na(home_df$delta_pix_h_rel_median_pose_hv))
## [1] 4
sum(is.na(home_df$stride_time_mean_sec_pose_hv))
## [1] 13
sum(is.na(home_df$mean_cadence_step_per_min_pose_hv))
## [1] 12
sum(is.na(home_df$stride_width_mean_cm_pose_hv))
## [1] 12
# stance and all double support measures 
sum(is.na(home_df$foot1_gait_cycle_time_mean_pose_hv))
## [1] 34
# ground truth preferred walk zeno mat data 
sum(is.na(home_df$PWS_cadencestepsminmean))
## [1] 0
sum(is.na(home_df$bingoEHR_EDSS_measure_value))
## [1] 0
sum(is.na(home_df$msfcEHR_T25FW.SPEED.AVG))
## [1] 0
# demographics 
sum(is.na(home_df$demoEHR_Age))
## [1] 0
sum(is.na(home_df$demoEHR_DiseaseDuration)) 
## [1] 0
sum(is.na(home_df$race_ethnicity_clean))
## [1] 0
sum(is.na(home_df$clean_sex))
## [1] 0
sum(is.na(home_df$ms_dx_condensed))
## [1] 0

T25FW Linear Regression

Transform T25FW

PWS, FW, and Home Videos

# preferred walking speed videos 
zeno_pws_df$t25fw_log <- log(zeno_pws_df$msfcEHR_T25FW.SPEED.AVG)

ggplot(data = zeno_pws_df, mapping = aes(msfcEHR_T25FW.SPEED.AVG)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = zeno_pws_df, mapping = aes(t25fw_log)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# fast walking speed videos 
zeno_fw_df$t25fw_log <- log(zeno_fw_df$msfcEHR_T25FW.SPEED.AVG)

ggplot(data = zeno_fw_df, mapping = aes(msfcEHR_T25FW.SPEED.AVG)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = zeno_fw_df, mapping = aes(t25fw_log)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# home - all videos  

home_df$t25fw_log <- log(home_df$msfcEHR_T25FW.SPEED.AVG)

ggplot(data = home_df, mapping = aes(msfcEHR_T25FW.SPEED.AVG)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = home_df, mapping = aes(t25fw_log)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Log Delta Pixel Proxy

zeno_pws_df$log_delta_pix_h_rel_median_pose_zv <- log(zeno_pws_df$delta_pix_h_rel_median_pose_zv)
zeno_fw_df$log_delta_pix_h_rel_median_pose_zv <- log(zeno_fw_df$delta_pix_h_rel_median_pose_zv)
home_df$log_delta_pix_h_rel_median_pose_hv <- log(home_df$delta_pix_h_rel_median_pose_hv)

# convert inf to NaN 
zeno_pws_df[] <- lapply(zeno_pws_df, function(x) {
  if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})

zeno_fw_df[] <- lapply(zeno_fw_df, function(x) {
  if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})

home_df[] <- lapply(home_df, function(x) {
  if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})

Preferred Walking Speed –> T25FW

Demographics univariate models –> Log T25FW

# PWS ------------------------------------------
uni1 <- lm(t25fw_log ~ demoEHR_Age, data = zeno_pws_df)
hist(resid(uni1)) 

uni2 <- lm(t25fw_log ~ demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(uni2)) 

uni3 <- lm(t25fw_log ~ ms_dx_condensed, data = zeno_pws_df)
hist(resid(uni3)) 

uni4 <- lm(t25fw_log ~ race_ethnicity_clean, data = zeno_pws_df)
hist(resid(uni4)) 

uni5 <- lm(t25fw_log ~ clean_sex, data = zeno_pws_df)
hist(resid(uni5)) 

## Extract regression results from each model 
results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age", 
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration", 
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed", 
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean", 
                                         AdjRsquared = summary(uni4)$adj.r.squared),
  tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex", 
                                         AdjRsquared = summary(uni5)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
##   Variable                AdjRsquared
##   <chr>                         <dbl>
## 1 demoEHR_Age                    0.09
## 2 demoEHR_DiseaseDuration        0   
## 3 ms_dx_condensed                0.22
## 4 race_ethnicity_clean           0.02
## 5 clean_sex                      0
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_pws_demographics_univariate_regression_adjR.csv"))

# Plot 
title <- "PWS: Log T25FW ~ Demographic/MS Info"
p <- univariate_regression_plot(results, title, 1)
p

ggsave(file.path(output_dir, "t25fw_pws_demographics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Metric univariate models - PWS video metrics

### Univariate video metrics 
uni1 <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = log_delta_pix_h_rel_median_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 16 rows containing missing values (`geom_point()`).

hist(resid(uni1))

uni2 <- lm(t25fw_log ~ stride_time_median_sec_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = stride_time_median_sec_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 48 rows containing missing values (`geom_point()`).

hist(resid(uni2))

uni3 <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = mean_cadence_step_per_min_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 40 rows containing missing values (`geom_point()`).

hist(resid(uni3))

uni4 <- lm(t25fw_log ~ stride_width_median_cm_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = stride_width_median_cm_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 40 rows containing missing values (`geom_point()`).

hist(resid(uni4))

uni_results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv", 
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv", 
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv",
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv",
                                         AdjRsquared = summary(uni4)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

uni_results <- uni_results %>% mutate(Model = "Unadjusted")

# print adjusted R squared 
adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
##   Variable                           AdjRsquared
##   <chr>                                    <dbl>
## 1 log_delta_pix_h_rel_median_pose_zv        0.34
## 2 stride_time_median_sec_pose_zv            0.14
## 3 mean_cadence_step_per_min_pose_zv         0.13
## 4 stride_width_median_cm_pose_zv            0.08
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_pws_metrics_univariate_regression_adjR.csv")) 

p <- univariate_regression_plot(uni_results, 'PWS: Log T25FW ~ Metric', 1)
p

ggsave(file.path(output_dir, "t25fw_pws_metrics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Single Metric, adjust 1 confounder - PWS video metrics

## age ---------------------------------------------------
con1_age <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con1_age))

con2_age <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con2_age))

con3_age <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con3_age))

con4_age <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con4_age))

con_age_results <- bind_rows(
  tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_Age")  # Define confounders to remove

con_age_results <- con_age_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_age_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~ -0.480     0.0474     -10.1  8.55e-20  -0.574   -0.387   log_del~ Adju~
## 2 strid~  0.878     0.157        5.59 8.81e- 8   0.568    1.19    stride_~ Adju~
## 3 mean_~ -0.00976   0.00175     -5.57 9.28e- 8  -0.0132  -0.00630 mean_ca~ Adju~
## 4 strid~  0.0312    0.00810      3.85 1.65e- 4   0.0152   0.0471  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~ -0.507     0.0491     -10.3  2.23e-20  -0.604   -0.410   log_del~
## 2 stride_time~  0.866     0.162        5.34 2.92e- 7   0.545    1.19    stride_~
## 3 mean_cadenc~ -0.00976   0.00183     -5.35 2.65e- 7  -0.0134  -0.00616 mean_ca~
## 4 stride_widt~  0.0337    0.00832      4.05 7.53e- 5   0.0173   0.0501  stride_~
## 5 log_delta_p~ -0.480     0.0474     -10.1  8.55e-20  -0.574   -0.387   log_del~
## 6 stride_time~  0.878     0.157        5.59 8.81e- 8   0.568    1.19    stride_~
## 7 mean_cadenc~ -0.00976   0.00175     -5.57 9.28e- 8  -0.0132  -0.00630 mean_ca~
## 8 stride_widt~  0.0312    0.00810      3.85 1.65e- 4   0.0152   0.0471  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'PWS: Log T25FW ~ Metric + demoEHR_Age', 1)
p

ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_age.png"), 
       bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con1_dur))

con2_dur <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con2_dur))

con3_dur <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con3_dur))

con4_dur <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con4_dur))

con_dur_results <- bind_rows(
  tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_DiseaseDuration")  # Define confounders to remove

con_dur_results
## # A tibble: 8 x 8
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~ -0.504     0.0492     -10.2  3.87e-20 -6.01e-1  -0.407   log_del~
## 2 demoEHR_Dis~  0.00324   0.00316      1.03 3.06e- 1 -2.99e-3   0.00946 log_del~
## 3 stride_time~  0.891     0.159        5.59 8.81e- 8  5.76e-1   1.21    stride_~
## 4 demoEHR_Dis~  0.00830   0.00298      2.79 5.93e- 3  2.42e-3   0.0142  stride_~
## 5 mean_cadenc~ -0.0101    0.00181     -5.61 7.54e- 8 -1.37e-2  -0.00656 mean_ca~
## 6 demoEHR_Dis~  0.00812   0.00326      2.49 1.35e- 2  1.70e-3   0.0145  mean_ca~
## 7 stride_widt~  0.0327    0.00829      3.95 1.13e- 4  1.64e-2   0.0491  stride_~
## 8 demoEHR_Dis~  0.00573   0.00338      1.69 9.20e- 2 -9.45e-4   0.0124  stride_~
con_dur_results <- con_dur_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dur_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~  -0.504    0.0492     -10.2  3.87e-20  -0.601   -0.407   log_del~ Adju~
## 2 strid~   0.891    0.159        5.59 8.81e- 8   0.576    1.21    stride_~ Adju~
## 3 mean_~  -0.0101   0.00181     -5.61 7.54e- 8  -0.0137  -0.00656 mean_ca~ Adju~
## 4 strid~   0.0327   0.00829      3.95 1.13e- 4   0.0164   0.0491  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~ -0.507     0.0491     -10.3  2.23e-20  -0.604   -0.410   log_del~
## 2 stride_time~  0.866     0.162        5.34 2.92e- 7   0.545    1.19    stride_~
## 3 mean_cadenc~ -0.00976   0.00183     -5.35 2.65e- 7  -0.0134  -0.00616 mean_ca~
## 4 stride_widt~  0.0337    0.00832      4.05 7.53e- 5   0.0173   0.0501  stride_~
## 5 log_delta_p~ -0.504     0.0492     -10.2  3.87e-20  -0.601   -0.407   log_del~
## 6 stride_time~  0.891     0.159        5.59 8.81e- 8   0.576    1.21    stride_~
## 7 mean_cadenc~ -0.0101    0.00181     -5.61 7.54e- 8  -0.0137  -0.00656 mean_ca~
## 8 stride_widt~  0.0327    0.00829      3.95 1.13e- 4   0.0164   0.0491  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'PWS: Log T25FW ~ Metric + demoEHR_DiseaseDuration', 1)
p

ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_duration.png"), 
       bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con1_dx))

con2_dx <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con2_dx))

con3_dx <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con3_dx))

con4_dx <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con4_dx))

con_dx_results <- bind_rows(
  tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("ms_dx_condensed")  # Define confounders to remove

con_dx_results
## # A tibble: 12 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~ -0.390     0.0509     -7.66  7.17e-13 -0.490    -0.290   log_del~
##  2 ms_dx_cond~  0.379     0.0692      5.48  1.24e- 7  0.243     0.516   log_del~
##  3 ms_dx_cond~  0.109     0.246       0.443 6.58e- 1 -0.376     0.594   log_del~
##  4 stride_tim~  0.725     0.163       4.45  1.54e- 5  0.404     1.05    stride_~
##  5 ms_dx_cond~  0.274     0.0696      3.94  1.18e- 4  0.137     0.412   stride_~
##  6 ms_dx_cond~ -0.0776    0.188      -0.413 6.80e- 1 -0.448     0.293   stride_~
##  7 mean_caden~ -0.00752   0.00176    -4.27  3.17e- 5 -0.0110   -0.00404 mean_ca~
##  8 ms_dx_cond~  0.372     0.0708      5.26  4.03e- 7  0.233     0.512   mean_ca~
##  9 ms_dx_cond~  0.0467    0.200       0.233 8.16e- 1 -0.349     0.442   mean_ca~
## 10 stride_wid~  0.0214    0.00813     2.63  9.31e- 3  0.00533   0.0374  stride_~
## 11 ms_dx_cond~  0.390     0.0739      5.28  3.74e- 7  0.244     0.535   stride_~
## 12 ms_dx_cond~  0.0554    0.206       0.268 7.89e- 1 -0.352     0.463   stride_~
con_dx_results <- con_dx_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dx_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~ -0.390     0.0509      -7.66 7.17e-13 -0.490    -0.290   log_del~ Adju~
## 2 strid~  0.725     0.163        4.45 1.54e- 5  0.404     1.05    stride_~ Adju~
## 3 mean_~ -0.00752   0.00176     -4.27 3.17e- 5 -0.0110   -0.00404 mean_ca~ Adju~
## 4 strid~  0.0214    0.00813      2.63 9.31e- 3  0.00533   0.0374  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~ -0.507     0.0491     -10.3  2.23e-20 -0.604    -0.410   log_del~
## 2 stride_time~  0.866     0.162        5.34 2.92e- 7  0.545     1.19    stride_~
## 3 mean_cadenc~ -0.00976   0.00183     -5.35 2.65e- 7 -0.0134   -0.00616 mean_ca~
## 4 stride_widt~  0.0337    0.00832      4.05 7.53e- 5  0.0173    0.0501  stride_~
## 5 log_delta_p~ -0.390     0.0509      -7.66 7.17e-13 -0.490    -0.290   log_del~
## 6 stride_time~  0.725     0.163        4.45 1.54e- 5  0.404     1.05    stride_~
## 7 mean_cadenc~ -0.00752   0.00176     -4.27 3.17e- 5 -0.0110   -0.00404 mean_ca~
## 8 stride_widt~  0.0214    0.00813      2.63 9.31e- 3  0.00533   0.0374  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'PWS: Log T25FW ~ Metric + MS DX',1)
p

ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_MSDX.png"), 
       bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con1_re))

con2_re <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con2_re))

con3_re <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con3_re))

con4_re <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con4_re))

con_re_results <- bind_rows(
  tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("race_ethnicity_clean")  # Define confounders to remove

con_re_results
## # A tibble: 20 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~ -0.508     0.0482   -10.5    5.58e-21  -0.603   -0.413   log_del~
##  2 race_ethni~ -0.0273    0.0981    -0.278  7.81e- 1  -0.221    0.166   log_del~
##  3 race_ethni~  0.344     0.101      3.40   8.00e- 4   0.145    0.543   log_del~
##  4 race_ethni~  0.0238    0.0881     0.270  7.88e- 1  -0.150    0.198   log_del~
##  5 race_ethni~ -0.0557    0.0926    -0.601  5.48e- 1  -0.238    0.127   log_del~
##  6 stride_tim~  0.807     0.156      5.19   6.04e- 7   0.500    1.11    stride_~
##  7 race_ethni~  0.0896    0.0951     0.942  3.48e- 1  -0.0982   0.277   stride_~
##  8 race_ethni~  0.415     0.0955     4.35   2.38e- 5   0.227    0.603   stride_~
##  9 race_ethni~ -0.0618    0.0793    -0.779  4.37e- 1  -0.218    0.0948  stride_~
## 10 race_ethni~  0.00641   0.109      0.0589 9.53e- 1  -0.208    0.221   stride_~
## 11 mean_caden~ -0.0101    0.00179   -5.64   6.68e- 8  -0.0136  -0.00655 mean_ca~
## 12 race_ethni~  0.110     0.107      1.03   3.04e- 1  -0.100    0.320   mean_ca~
## 13 race_ethni~  0.443     0.106      4.16   4.85e- 5   0.233    0.653   mean_ca~
## 14 race_ethni~  0.0571    0.0867     0.658  5.11e- 1  -0.114    0.228   mean_ca~
## 15 race_ethni~ -0.0743    0.118     -0.632  5.28e- 1  -0.306    0.158   mean_ca~
## 16 stride_wid~  0.0298    0.00828    3.60   4.16e- 4   0.0135   0.0461  stride_~
## 17 race_ethni~  0.0462    0.112      0.412  6.81e- 1  -0.175    0.267   stride_~
## 18 race_ethni~  0.391     0.113      3.47   6.54e- 4   0.169    0.614   stride_~
## 19 race_ethni~  0.0592    0.0909     0.651  5.16e- 1  -0.120    0.239   stride_~
## 20 race_ethni~  0.0643    0.122      0.529  5.97e- 1  -0.175    0.304   stride_~
con_re_results <- con_re_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_re_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~  -0.508    0.0482     -10.5  5.58e-21  -0.603   -0.413   log_del~ Adju~
## 2 strid~   0.807    0.156        5.19 6.04e- 7   0.500    1.11    stride_~ Adju~
## 3 mean_~  -0.0101   0.00179     -5.64 6.68e- 8  -0.0136  -0.00655 mean_ca~ Adju~
## 4 strid~   0.0298   0.00828      3.60 4.16e- 4   0.0135   0.0461  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~ -0.507     0.0491     -10.3  2.23e-20  -0.604   -0.410   log_del~
## 2 stride_time~  0.866     0.162        5.34 2.92e- 7   0.545    1.19    stride_~
## 3 mean_cadenc~ -0.00976   0.00183     -5.35 2.65e- 7  -0.0134  -0.00616 mean_ca~
## 4 stride_widt~  0.0337    0.00832      4.05 7.53e- 5   0.0173   0.0501  stride_~
## 5 log_delta_p~ -0.508     0.0482     -10.5  5.58e-21  -0.603   -0.413   log_del~
## 6 stride_time~  0.807     0.156        5.19 6.04e- 7   0.500    1.11    stride_~
## 7 mean_cadenc~ -0.0101    0.00179     -5.64 6.68e- 8  -0.0136  -0.00655 mean_ca~
## 8 stride_widt~  0.0298    0.00828      3.60 4.16e- 4   0.0135   0.0461  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'PWS: Log T25FW ~ Metric + race_ethnicity_clean', 1)
p

ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_RaceEthnicity.png"), 
       bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con1_sex))

con2_sex <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con2_sex))

con3_sex <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con3_sex))

con4_sex <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con4_sex))

con_sex_results <- bind_rows(
  tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("clean_sex")  # Define confounders to remove

con_sex_results
## # A tibble: 12 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~  -0.505    0.0495    -10.2   4.89e-20  -0.603   -0.408   log_del~
##  2 clean_sexM~  -0.0402   0.0610     -0.659 5.11e- 1  -0.161    0.0801  log_del~
##  3 clean_sexN~   0.0610   0.372       0.164 8.70e- 1  -0.672    0.794   log_del~
##  4 stride_tim~   0.877    0.163       5.39  2.26e- 7   0.556    1.20    stride_~
##  5 clean_sexM~  -0.0765   0.0571     -1.34  1.82e- 1  -0.189    0.0362  stride_~
##  6 clean_sexN~  -0.0812   0.331      -0.245 8.07e- 1  -0.735    0.573   stride_~
##  7 mean_caden~  -0.0101   0.00183    -5.51  1.23e- 7  -0.0137  -0.00646 mean_ca~
##  8 clean_sexM~  -0.118    0.0625     -1.88  6.15e- 2  -0.241    0.00573 mean_ca~
##  9 clean_sexN~  -0.0561   0.367      -0.153 8.79e- 1  -0.780    0.668   mean_ca~
## 10 stride_wid~   0.0348   0.00832     4.18  4.48e- 5   0.0184   0.0512  stride_~
## 11 clean_sexM~  -0.108    0.0645     -1.67  9.69e- 2  -0.235    0.0196  stride_~
## 12 clean_sexN~  -0.160    0.379      -0.421 6.74e- 1  -0.907    0.588   stride_~
con_sex_results <- con_sex_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_sex_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~  -0.505    0.0495     -10.2  4.89e-20  -0.603   -0.408   log_del~ Adju~
## 2 strid~   0.877    0.163        5.39 2.26e- 7   0.556    1.20    stride_~ Adju~
## 3 mean_~  -0.0101   0.00183     -5.51 1.23e- 7  -0.0137  -0.00646 mean_ca~ Adju~
## 4 strid~   0.0348   0.00832      4.18 4.48e- 5   0.0184   0.0512  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~ -0.507     0.0491     -10.3  2.23e-20  -0.604   -0.410   log_del~
## 2 stride_time~  0.866     0.162        5.34 2.92e- 7   0.545    1.19    stride_~
## 3 mean_cadenc~ -0.00976   0.00183     -5.35 2.65e- 7  -0.0134  -0.00616 mean_ca~
## 4 stride_widt~  0.0337    0.00832      4.05 7.53e- 5   0.0173   0.0501  stride_~
## 5 log_delta_p~ -0.505     0.0495     -10.2  4.89e-20  -0.603   -0.408   log_del~
## 6 stride_time~  0.877     0.163        5.39 2.26e- 7   0.556    1.20    stride_~
## 7 mean_cadenc~ -0.0101    0.00183     -5.51 1.23e- 7  -0.0137  -0.00646 mean_ca~
## 8 stride_widt~  0.0348    0.00832      4.18 4.48e- 5   0.0184   0.0512  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'PWS: Log T25FW ~ Metric + Sex', 1)
p

ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_sex.png"), 
       bg = "white", width= 10, height=10)

Multivariate - PWS –> T25fw

Metrics only - not including double support/stance measures, too many missing. May include after improving code

# PWS 

# Unadjusted 
all_unadj <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + 
                  stride_time_median_sec_pose_zv + 
                  mean_cadence_step_per_min_pose_zv + 
                  stride_width_median_cm_pose_zv, 
                data = zeno_pws_df)

summary(all_unadj)
## 
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + 
##     stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv + 
##     stride_width_median_cm_pose_zv, data = zeno_pws_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80025 -0.20246 -0.04435  0.14522  1.24370 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         0.779483   0.507896   1.535 0.126863    
## log_delta_pix_h_rel_median_pose_zv -0.192139   0.055311  -3.474 0.000663 ***
## stride_time_median_sec_pose_zv      0.458203   0.232284   1.973 0.050298 .  
## mean_cadence_step_per_min_pose_zv  -0.002736   0.002682  -1.020 0.309220    
## stride_width_median_cm_pose_zv      0.026049   0.007828   3.328 0.001091 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3099 on 157 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.2909, Adjusted R-squared:  0.2729 
## F-statistic:  16.1 on 4 and 157 DF,  p-value: 4.537e-11
hist(resid(all_unadj))

# Adjusted 
all_adjusted <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + 
                     stride_time_median_sec_pose_zv + 
                     mean_cadence_step_per_min_pose_zv + 
                     stride_width_median_cm_pose_zv + 
                     demoEHR_Age + 
                     demoEHR_DiseaseDuration +
                     ms_dx_condensed + 
                     race_ethnicity_clean + 
                     clean_sex, 
                   data = zeno_pws_df)

summary(all_adjusted)
## 
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + 
##     stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv + 
##     stride_width_median_cm_pose_zv + demoEHR_Age + demoEHR_DiseaseDuration + 
##     ms_dx_condensed + race_ethnicity_clean + clean_sex, data = zeno_pws_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6386 -0.1679 -0.0270  0.1233  1.1528 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                    1.021378   0.494099   2.067
## log_delta_pix_h_rel_median_pose_zv            -0.159210   0.052207  -3.050
## stride_time_median_sec_pose_zv                 0.272391   0.216581   1.258
## mean_cadence_step_per_min_pose_zv             -0.004656   0.002513  -1.853
## stride_width_median_cm_pose_zv                 0.012098   0.007641   1.583
## demoEHR_Age                                    0.006144   0.002333   2.633
## demoEHR_DiseaseDuration                        0.002141   0.003254   0.658
## ms_dx_condensedProgressive MS                  0.186514   0.075461   2.472
## ms_dx_condensedMS, Subtype Not Specified       0.004998   0.203712   0.025
## race_ethnicity_cleanAsian                      0.128675   0.097169   1.324
## race_ethnicity_cleanBlack Or African American  0.442252   0.087473   5.056
## race_ethnicity_cleanHispanic or Latino         0.073267   0.077700   0.943
## race_ethnicity_cleanOther/Unknown/Declined     0.077839   0.103577   0.752
## clean_sexMale                                 -0.074394   0.053747  -1.384
## clean_sexNon-Binary                            0.108748   0.282705   0.385
##                                               Pr(>|t|)    
## (Intercept)                                    0.04047 *  
## log_delta_pix_h_rel_median_pose_zv             0.00272 ** 
## stride_time_median_sec_pose_zv                 0.21050    
## mean_cadence_step_per_min_pose_zv              0.06591 .  
## stride_width_median_cm_pose_zv                 0.11552    
## demoEHR_Age                                    0.00937 ** 
## demoEHR_DiseaseDuration                        0.51157    
## ms_dx_condensedProgressive MS                  0.01459 *  
## ms_dx_condensedMS, Subtype Not Specified       0.98046    
## race_ethnicity_cleanAsian                      0.18748    
## race_ethnicity_cleanBlack Or African American 1.26e-06 ***
## race_ethnicity_cleanHispanic or Latino         0.34725    
## race_ethnicity_cleanOther/Unknown/Declined     0.45355    
## clean_sexMale                                  0.16841    
## clean_sexNon-Binary                            0.70104    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2784 on 147 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.4642, Adjusted R-squared:  0.4131 
## F-statistic: 9.096 on 14 and 147 DF,  p-value: 3.854e-14
hist(resid(all_adjusted))

# format for plotting 
# Unadjusted 
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>% 
  mutate(Model = "Unadjusted", 
         AdjRsquared = summary(all_unadj)$adj.r.squared)

# Adjusted 
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>% 
  mutate(Model = "Adjusted", 
         AdjRsquared = summary(all_adjusted)$adj.r.squared)

# combine and plot 
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>% 
  filter(term != "(Intercept)")  %>% # Remove intercept term
  mutate(priority = ifelse(grepl("zv", term), 1, 2)) %>% # assign priority to video metrics 
  arrange(priority) %>% # sort by priority 
  select(-priority) # drop priority colum
  
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
##   Model      AdjRsquared
##   <chr>            <dbl>
## 1 Unadjusted        0.27
## 2 Adjusted          0.41
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_pws_allMetrics_unadjusted_vs_adjusted_adjR.csv"))

p <- adj_vs_unadj_regression_plot(multivar_results, 'PWS: Log T25FW Multivariate - Unadjusted vs Adjusted', 1)
p

ggsave(file.path(output_dir, "t25fw_pws_allMetrics_unadjusted_vs_adjusted.png"), 
       bg = "white", width= 10, height=10)

Fast Walking Speed –> T25fw

Demographics univariate models –> Log T25FW

# FW ------------------------------------------
uni1 <- lm(t25fw_log ~ demoEHR_Age, data = zeno_fw_df)
hist(resid(uni1)) 

uni2 <- lm(t25fw_log ~ demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(uni2)) 

uni3 <- lm(t25fw_log ~ ms_dx_condensed, data = zeno_fw_df)
hist(resid(uni3)) 

uni4 <- lm(t25fw_log ~ race_ethnicity_clean, data = zeno_fw_df)
hist(resid(uni4)) 

uni5 <- lm(t25fw_log ~ clean_sex, data = zeno_fw_df)
hist(resid(uni4)) 

## Extract regression results from each model 
results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age", 
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration", 
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed", 
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean", 
                                         AdjRsquared = summary(uni4)$adj.r.squared),
  tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex", 
                                         AdjRsquared = summary(uni5)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
##   Variable                AdjRsquared
##   <chr>                         <dbl>
## 1 demoEHR_Age                    0.09
## 2 demoEHR_DiseaseDuration        0   
## 3 ms_dx_condensed                0.22
## 4 race_ethnicity_clean           0.02
## 5 clean_sex                      0
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_fw_demographics_univariate_regression_adjR.csv")) 

# Plot 
title <- "FW: Log T25FW ~ Demographic/MS Info"
p <- univariate_regression_plot(results, title, 1)
p

ggsave(file.path(output_dir, "t25fw_fw_demographics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Metric univariate models - FW video metrics

### Univariate video metrics 
uni1 <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = log_delta_pix_h_rel_median_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 4 rows containing missing values (`geom_point()`).

hist(resid(uni1))

uni2 <- lm(t25fw_log ~ stride_time_median_sec_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = stride_time_median_sec_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 53 rows containing missing values (`geom_point()`).

hist(resid(uni2))

uni3 <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = mean_cadence_step_per_min_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 46 rows containing missing values (`geom_point()`).

hist(resid(uni3))

uni4 <- lm(t25fw_log ~ stride_width_median_cm_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = stride_width_median_cm_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 47 rows containing missing values (`geom_point()`).

hist(resid(uni4))

uni_results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv",
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv",
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv",
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv",
                                         AdjRsquared = summary(uni4)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

uni_results <- uni_results %>% mutate(Model = "Unadjusted")
adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
##   Variable                           AdjRsquared
##   <chr>                                    <dbl>
## 1 log_delta_pix_h_rel_median_pose_zv        0.44
## 2 stride_time_median_sec_pose_zv            0.34
## 3 mean_cadence_step_per_min_pose_zv         0.28
## 4 stride_width_median_cm_pose_zv            0.02
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_fw_metrics_univariate_regression_adjR.csv"))

p <- univariate_regression_plot(uni_results, 'FW: Log T25FW ~ Metric', 1)
p

ggsave(file.path(output_dir, "t25fw_fw_metrics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Single Metric, adjust 1 confounder - FW video metrics

## age ---------------------------------------------------
con1_age <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con1_age))

con2_age <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con2_age))

con3_age <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con3_age))

con4_age <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con4_age))

con_age_results <- bind_rows(
  tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_Age")  # Define confounders to remove

con_age_results <- con_age_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_age_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~  -0.507    0.0413     -12.3  1.46e-26 -0.588    -0.425   log_del~ Adju~
## 2 strid~   1.44     0.151        9.54 1.85e-17  1.14      1.74    stride_~ Adju~
## 3 mean_~  -0.0108   0.00128     -8.41 1.50e-14 -0.0133   -0.00823 mean_ca~ Adju~
## 4 strid~   0.0264   0.0117       2.25 2.56e- 2  0.00325   0.0495  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~  -0.538    0.0413     -13.0  5.54e-29 -0.619    -0.456   log_del~
## 2 stride_time~   1.47     0.156        9.40 4.06e-17  1.16      1.77    stride_~
## 3 mean_cadenc~  -0.0111   0.00133     -8.36 1.89e-14 -0.0138   -0.00851 mean_ca~
## 4 stride_widt~   0.0284   0.0122       2.33 2.07e- 2  0.00439   0.0524  stride_~
## 5 log_delta_p~  -0.507    0.0413     -12.3  1.46e-26 -0.588    -0.425   log_del~
## 6 stride_time~   1.44     0.151        9.54 1.85e-17  1.14      1.74    stride_~
## 7 mean_cadenc~  -0.0108   0.00128     -8.41 1.50e-14 -0.0133   -0.00823 mean_ca~
## 8 stride_widt~   0.0264   0.0117       2.25 2.56e- 2  0.00325   0.0495  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'FW: Log T25FW ~ Metric + demoEHR_Age', 1)
p

ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_age.png"), 
       bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con1_dur))

con2_dur <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con2_dur))

con3_dur <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con3_dur))

con4_dur <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con4_dur))

con_dur_results <- bind_rows(
  tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_DiseaseDuration")  # Define confounders to remove

con_dur_results
## # A tibble: 8 x 8
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~ -0.536     0.0415    -12.9   1.33e-28 -6.18e-1  -0.454   log_del~
## 2 demoEHR_Dis~  0.00119   0.00283     0.420 6.75e- 1 -4.39e-3   0.00677 log_del~
## 3 stride_time~  1.49      0.153       9.69  7.06e-18  1.18e+0   1.79    stride_~
## 4 demoEHR_Dis~  0.00841   0.00316     2.66  8.47e- 3  2.18e-3   0.0146  stride_~
## 5 mean_cadenc~ -0.0113    0.00132    -8.50  8.32e-15 -1.39e-2  -0.00865 mean_ca~
## 6 demoEHR_Dis~  0.00660   0.00364     1.81  7.17e- 2 -5.89e-4   0.0138  mean_ca~
## 7 stride_widt~  0.0271    0.0123      2.21  2.84e- 2  2.89e-3   0.0513  stride_~
## 8 demoEHR_Dis~  0.00405   0.00431     0.941 3.48e- 1 -4.45e-3   0.0126  stride_~
con_dur_results <- con_dur_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dur_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~  -0.536    0.0415     -12.9  1.33e-28 -0.618    -0.454   log_del~ Adju~
## 2 strid~   1.49     0.153        9.69 7.06e-18  1.18      1.79    stride_~ Adju~
## 3 mean_~  -0.0113   0.00132     -8.50 8.32e-15 -0.0139   -0.00865 mean_ca~ Adju~
## 4 strid~   0.0271   0.0123       2.21 2.84e- 2  0.00289   0.0513  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~  -0.538    0.0413     -13.0  5.54e-29 -0.619    -0.456   log_del~
## 2 stride_time~   1.47     0.156        9.40 4.06e-17  1.16      1.77    stride_~
## 3 mean_cadenc~  -0.0111   0.00133     -8.36 1.89e-14 -0.0138   -0.00851 mean_ca~
## 4 stride_widt~   0.0284   0.0122       2.33 2.07e- 2  0.00439   0.0524  stride_~
## 5 log_delta_p~  -0.536    0.0415     -12.9  1.33e-28 -0.618    -0.454   log_del~
## 6 stride_time~   1.49     0.153        9.69 7.06e-18  1.18      1.79    stride_~
## 7 mean_cadenc~  -0.0113   0.00132     -8.50 8.32e-15 -0.0139   -0.00865 mean_ca~
## 8 stride_widt~   0.0271   0.0123       2.21 2.84e- 2  0.00289   0.0513  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'FW: Log T25FW ~ Metric + demoEHR_DiseaseDuration', 1)
p

ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_duration.png"), 
       bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con1_dx))

con2_dx <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con2_dx))

con3_dx <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con3_dx))

con4_dx <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con4_dx))

con_dx_results <- bind_rows(
  tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("ms_dx_condensed")  # Define confounders to remove

con_dx_results
## # A tibble: 12 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~ -0.459     0.0421    -10.9   2.56e-22 -0.542    -0.376   log_del~
##  2 ms_dx_cond~  0.307     0.0599      5.13  6.53e- 7  0.189     0.426   log_del~
##  3 ms_dx_cond~  0.248     0.316       0.786 4.33e- 1 -0.375     0.872   log_del~
##  4 stride_tim~  1.27      0.166       7.64  1.64e-12  0.938     1.59    stride_~
##  5 ms_dx_cond~  0.242     0.0779      3.11  2.18e- 3  0.0887    0.396   stride_~
##  6 ms_dx_cond~  0.0237    0.236       0.101 9.20e- 1 -0.441     0.489   stride_~
##  7 mean_caden~ -0.00889   0.00133    -6.71  2.69e-10 -0.0115   -0.00628 mean_ca~
##  8 ms_dx_cond~  0.408     0.0799      5.11  8.55e- 7  0.251     0.566   mean_ca~
##  9 ms_dx_cond~  0.0731    0.260       0.281 7.79e- 1 -0.441     0.587   mean_ca~
## 10 stride_wid~  0.0179    0.0111      1.61  1.08e- 1 -0.00398   0.0397  stride_~
## 11 ms_dx_cond~  0.570     0.0849      6.71  2.80e-10  0.402     0.737   stride_~
## 12 ms_dx_cond~ -0.0381    0.292      -0.131 8.96e- 1 -0.614     0.538   stride_~
con_dx_results <- con_dx_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dx_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~ -0.459     0.0421     -10.9  2.56e-22 -0.542    -0.376   log_del~ Adju~
## 2 strid~  1.27      0.166        7.64 1.64e-12  0.938     1.59    stride_~ Adju~
## 3 mean_~ -0.00889   0.00133     -6.71 2.69e-10 -0.0115   -0.00628 mean_ca~ Adju~
## 4 strid~  0.0179    0.0111       1.61 1.08e- 1 -0.00398   0.0397  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~ -0.538     0.0413     -13.0  5.54e-29 -0.619    -0.456   log_del~
## 2 stride_time~  1.47      0.156        9.40 4.06e-17  1.16      1.77    stride_~
## 3 mean_cadenc~ -0.0111    0.00133     -8.36 1.89e-14 -0.0138   -0.00851 mean_ca~
## 4 stride_widt~  0.0284    0.0122       2.33 2.07e- 2  0.00439   0.0524  stride_~
## 5 log_delta_p~ -0.459     0.0421     -10.9  2.56e-22 -0.542    -0.376   log_del~
## 6 stride_time~  1.27      0.166        7.64 1.64e-12  0.938     1.59    stride_~
## 7 mean_cadenc~ -0.00889   0.00133     -6.71 2.69e-10 -0.0115   -0.00628 mean_ca~
## 8 stride_widt~  0.0179    0.0111       1.61 1.08e- 1 -0.00398   0.0397  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'FW: Log T25FW ~ Metric + MS DX', 1)
p

ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_MSDX.png"), 
       bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con1_re))

con2_re <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con2_re))

con3_re <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con3_re))

con4_re <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con4_re))

con_re_results <- bind_rows(
  tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("race_ethnicity_clean")  # Define confounders to remove

con_re_results
## # A tibble: 20 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~ -5.28e-1   0.0416  -12.7     7.12e-28 -0.610    -0.446   log_del~
##  2 race_ethni~  2.73e-2   0.0871    0.314   7.54e- 1 -0.144     0.199   log_del~
##  3 race_ethni~  1.88e-1   0.0933    2.02    4.49e- 2  0.00430   0.372   log_del~
##  4 race_ethni~ -8.05e-2   0.0809   -0.995   3.21e- 1 -0.240     0.0789  log_del~
##  5 race_ethni~ -5.02e-2   0.0826   -0.607   5.44e- 1 -0.213     0.113   log_del~
##  6 stride_tim~  1.39e+0   0.155     8.95    7.51e-16  1.08      1.70    stride_~
##  7 race_ethni~  2.07e-2   0.0944    0.219   8.27e- 1 -0.166     0.207   stride_~
##  8 race_ethni~  3.22e-1   0.102     3.16    1.90e- 3  0.120     0.523   stride_~
##  9 race_ethni~  5.55e-2   0.0886    0.626   5.32e- 1 -0.119     0.231   stride_~
## 10 race_ethni~ -2.30e-2   0.101    -0.228   8.20e- 1 -0.222     0.176   stride_~
## 11 mean_caden~ -1.09e-2   0.00132  -8.23    4.83e-14 -0.0135   -0.00825 mean_ca~
## 12 race_ethni~  6.65e-4   0.108     0.00613 9.95e- 1 -0.213     0.215   mean_ca~
## 13 race_ethni~  3.55e-1   0.116     3.06    2.61e- 3  0.126     0.585   mean_ca~
## 14 race_ethni~  3.70e-2   0.0995    0.372   7.10e- 1 -0.159     0.233   mean_ca~
## 15 race_ethni~ -4.03e-2   0.116    -0.347   7.29e- 1 -0.270     0.189   mean_ca~
## 16 stride_wid~  2.10e-2   0.0126    1.67    9.64e- 2 -0.00380   0.0459  stride_~
## 17 race_ethni~  2.66e-2   0.129     0.206   8.37e- 1 -0.228     0.281   stride_~
## 18 race_ethni~  3.81e-1   0.139     2.74    6.72e- 3  0.107     0.654   stride_~
## 19 race_ethni~  7.18e-3   0.117     0.0615  9.51e- 1 -0.223     0.237   stride_~
## 20 race_ethni~ -4.67e-2   0.138    -0.339   7.35e- 1 -0.318     0.225   stride_~
con_re_results <- con_re_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_re_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~  -0.528    0.0416     -12.7  7.12e-28 -0.610    -0.446   log_del~ Adju~
## 2 strid~   1.39     0.155        8.95 7.51e-16  1.08      1.70    stride_~ Adju~
## 3 mean_~  -0.0109   0.00132     -8.23 4.83e-14 -0.0135   -0.00825 mean_ca~ Adju~
## 4 strid~   0.0210   0.0126       1.67 9.64e- 2 -0.00380   0.0459  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~  -0.538    0.0413     -13.0  5.54e-29 -0.619    -0.456   log_del~
## 2 stride_time~   1.47     0.156        9.40 4.06e-17  1.16      1.77    stride_~
## 3 mean_cadenc~  -0.0111   0.00133     -8.36 1.89e-14 -0.0138   -0.00851 mean_ca~
## 4 stride_widt~   0.0284   0.0122       2.33 2.07e- 2  0.00439   0.0524  stride_~
## 5 log_delta_p~  -0.528    0.0416     -12.7  7.12e-28 -0.610    -0.446   log_del~
## 6 stride_time~   1.39     0.155        8.95 7.51e-16  1.08      1.70    stride_~
## 7 mean_cadenc~  -0.0109   0.00132     -8.23 4.83e-14 -0.0135   -0.00825 mean_ca~
## 8 stride_widt~   0.0210   0.0126       1.67 9.64e- 2 -0.00380   0.0459  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'FW: Log T25FW ~ Metric + race_ethnicity_clean', 1)
p

ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_RaceEthnicity.png"), 
       bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con1_sex))

con2_sex <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con2_sex))

con3_sex <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con3_sex))

con4_sex <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con4_sex))

con_sex_results <- bind_rows(
  tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("clean_sex")  # Define confounders to remove

con_sex_results
## # A tibble: 12 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~ -0.537     0.0416  -12.9     1.44e-28 -0.619    -0.455   log_del~
##  2 clean_sexM~ -0.0266    0.0540   -0.492   6.23e- 1 -0.133     0.0799  log_del~
##  3 clean_sexN~  0.0274    0.238     0.115   9.08e- 1 -0.442     0.497   log_del~
##  4 stride_tim~  1.47      0.156     9.44    3.51e-17  1.16      1.78    stride_~
##  5 clean_sexM~ -0.0873    0.0599   -1.46    1.47e- 1 -0.205     0.0309  stride_~
##  6 clean_sexN~ -0.0828    0.241    -0.344   7.32e- 1 -0.559     0.393   stride_~
##  7 mean_caden~ -0.0112    0.00133  -8.41    1.52e-14 -0.0138   -0.00856 mean_ca~
##  8 clean_sexM~ -0.116     0.0684   -1.70    9.12e- 2 -0.251     0.0188  mean_ca~
##  9 clean_sexN~  0.00268   0.277     0.00966 9.92e- 1 -0.545     0.550   mean_ca~
## 10 stride_wid~  0.0310    0.0122    2.53    1.21e- 2  0.00687   0.0552  stride_~
## 11 clean_sexM~ -0.129     0.0805   -1.60    1.12e- 1 -0.288     0.0303  stride_~
## 12 clean_sexN~ -0.153     0.324    -0.472   6.38e- 1 -0.792     0.486   stride_~
con_sex_results <- con_sex_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_sex_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~  -0.537    0.0416     -12.9  1.44e-28 -0.619    -0.455   log_del~ Adju~
## 2 strid~   1.47     0.156        9.44 3.51e-17  1.16      1.78    stride_~ Adju~
## 3 mean_~  -0.0112   0.00133     -8.41 1.52e-14 -0.0138   -0.00856 mean_ca~ Adju~
## 4 strid~   0.0310   0.0122       2.53 1.21e- 2  0.00687   0.0552  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~  -0.538    0.0413     -13.0  5.54e-29 -0.619    -0.456   log_del~
## 2 stride_time~   1.47     0.156        9.40 4.06e-17  1.16      1.77    stride_~
## 3 mean_cadenc~  -0.0111   0.00133     -8.36 1.89e-14 -0.0138   -0.00851 mean_ca~
## 4 stride_widt~   0.0284   0.0122       2.33 2.07e- 2  0.00439   0.0524  stride_~
## 5 log_delta_p~  -0.537    0.0416     -12.9  1.44e-28 -0.619    -0.455   log_del~
## 6 stride_time~   1.47     0.156        9.44 3.51e-17  1.16      1.78    stride_~
## 7 mean_cadenc~  -0.0112   0.00133     -8.41 1.52e-14 -0.0138   -0.00856 mean_ca~
## 8 stride_widt~   0.0310   0.0122       2.53 1.21e- 2  0.00687   0.0552  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'FW: Log T25FW ~ Metric + Sex', 1)
p

ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_sex.png"), 
       bg = "white", width= 10, height=10)

Multivariate - FW –> T25fw

Metrics only - not including double support/stance measures, too many missing. May include after improving code

# FW 

# Unadjusted 
all_unadj <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + 
                  stride_time_median_sec_pose_zv + 
                  mean_cadence_step_per_min_pose_zv + 
                  stride_width_median_cm_pose_zv, 
                data = zeno_fw_df)

summary(all_unadj)
## 
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + 
##     stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv + 
##     stride_width_median_cm_pose_zv, data = zeno_fw_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7376 -0.1609 -0.0209  0.1198  1.4365 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         1.286575   0.379772   3.388 0.000885 ***
## log_delta_pix_h_rel_median_pose_zv -0.362830   0.050788  -7.144 2.92e-11 ***
## stride_time_median_sec_pose_zv      0.521656   0.206156   2.530 0.012347 *  
## mean_cadence_step_per_min_pose_zv  -0.004323   0.001743  -2.480 0.014167 *  
## stride_width_median_cm_pose_zv     -0.001463   0.008556  -0.171 0.864450    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2878 on 162 degrees of freedom
##   (55 observations deleted due to missingness)
## Multiple R-squared:  0.5403, Adjusted R-squared:  0.5289 
## F-statistic:  47.6 on 4 and 162 DF,  p-value: < 2.2e-16
hist(resid(all_unadj))

# Adjusted 
all_adjusted <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + 
                     stride_time_median_sec_pose_zv + 
                     mean_cadence_step_per_min_pose_zv + 
                     stride_width_median_cm_pose_zv + 
                     demoEHR_Age + 
                     demoEHR_DiseaseDuration +
                     ms_dx_condensed + 
                     race_ethnicity_clean + 
                     clean_sex, 
                   data = zeno_fw_df)

summary(all_adjusted)
## 
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + 
##     stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv + 
##     stride_width_median_cm_pose_zv + demoEHR_Age + demoEHR_DiseaseDuration + 
##     ms_dx_condensed + race_ethnicity_clean + clean_sex, data = zeno_fw_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62995 -0.14801 -0.00986  0.13401  1.22547 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                    1.2167389  0.3792645   3.208
## log_delta_pix_h_rel_median_pose_zv            -0.3157477  0.0498813  -6.330
## stride_time_median_sec_pose_zv                 0.3987015  0.1985435   2.008
## mean_cadence_step_per_min_pose_zv             -0.0044988  0.0016791  -2.679
## stride_width_median_cm_pose_zv                -0.0086240  0.0085587  -1.008
## demoEHR_Age                                    0.0056733  0.0021890   2.592
## demoEHR_DiseaseDuration                        0.0009481  0.0031031   0.306
## ms_dx_condensedProgressive MS                  0.1349804  0.0697888   1.934
## ms_dx_condensedMS, Subtype Not Specified       0.3094267  0.2790501   1.109
## race_ethnicity_cleanAsian                      0.1381439  0.0844554   1.636
## race_ethnicity_cleanBlack Or African American  0.3244756  0.0856144   3.790
## race_ethnicity_cleanHispanic or Latino         0.0724510  0.0764232   0.948
## race_ethnicity_cleanOther/Unknown/Declined     0.0123925  0.0883343   0.140
## clean_sexMale                                 -0.0632811  0.0512554  -1.235
## clean_sexNon-Binary                            0.1288287  0.1959085   0.658
##                                               Pr(>|t|)    
## (Intercept)                                   0.001629 ** 
## log_delta_pix_h_rel_median_pose_zv            2.62e-09 ***
## stride_time_median_sec_pose_zv                0.046401 *  
## mean_cadence_step_per_min_pose_zv             0.008191 ** 
## stride_width_median_cm_pose_zv                0.315234    
## demoEHR_Age                                   0.010481 *  
## demoEHR_DiseaseDuration                       0.760375    
## ms_dx_condensedProgressive MS                 0.054955 .  
## ms_dx_condensedMS, Subtype Not Specified      0.269243    
## race_ethnicity_cleanAsian                     0.103971    
## race_ethnicity_cleanBlack Or African American 0.000217 ***
## race_ethnicity_cleanHispanic or Latino        0.344623    
## race_ethnicity_cleanOther/Unknown/Declined    0.888616    
## clean_sexMale                                 0.218876    
## clean_sexNon-Binary                           0.511792    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.269 on 152 degrees of freedom
##   (55 observations deleted due to missingness)
## Multiple R-squared:  0.623,  Adjusted R-squared:  0.5883 
## F-statistic: 17.94 on 14 and 152 DF,  p-value: < 2.2e-16
hist(resid(all_adjusted))

# format for plotting 
# Unadjusted 
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>% 
  mutate(Model = "Unadjusted",
         AdjRsquared = summary(all_unadj)$adj.r.squared)

# Adjusted 
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>% 
  mutate(Model = "Adjusted",
         AdjRsquared = summary(all_adjusted)$adj.r.squared)


# combine and plot 
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>% 
  filter(term != "(Intercept)")  %>% # Remove intercept term
  mutate(priority = ifelse(grepl("zv", term), 1, 2)) %>% # assign priority to video metrics 
  arrange(priority) %>% # sort by priority 
  select(-priority) # drop priority colum
  
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
##   Model      AdjRsquared
##   <chr>            <dbl>
## 1 Unadjusted        0.53
## 2 Adjusted          0.59
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_fw_allMetrics_unadjusted_vs_adjusted_adjR.csv")) 

p <- adj_vs_unadj_regression_plot(multivar_results, 'FW: Log T25FW Multivariate - Unadjusted vs Adjusted', 1)
p

ggsave(file.path(output_dir, "t25fw_fw_allMetrics_unadjusted_vs_adjusted.png"), 
       bg = "white", width= 10, height=10)

Home Videos –> T25fw

Home Univariate models with disease info and demographics –> Log T25FW

uni1 <- lm(t25fw_log ~ demoEHR_Age, data = home_df)
hist(resid(uni1)) 

uni2 <- lm(t25fw_log ~ demoEHR_DiseaseDuration, data = home_df)
hist(resid(uni2)) 

uni3 <- lm(t25fw_log ~ ms_dx_condensed, data = home_df)
hist(resid(uni3)) 

uni4 <- lm(t25fw_log ~ race_ethnicity_clean, data = home_df)
hist(resid(uni4)) 

uni5 <- lm(t25fw_log ~ clean_sex, data = home_df)
hist(resid(uni4)) 

## Extract regression results from each model 
results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age", 
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration", 
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed", 
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean", 
                                         AdjRsquared = summary(uni4)$adj.r.squared),
  tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex", 
                                         AdjRsquared = summary(uni5)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

adj_rsqu_df <- results[c("Variable", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
##   Variable                AdjRsquared
##   <chr>                         <dbl>
## 1 demoEHR_Age                    0.11
## 2 demoEHR_DiseaseDuration        0.03
## 3 ms_dx_condensed                0.29
## 4 race_ethnicity_clean           0.26
## 5 clean_sex                      0.01
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_home_demographics_univariate_regression_adjR.csv")) 

# Apply rounding function to p-value columns 
results <- results %>% mutate(pval_text = paste0("p=", sapply(p.value, format_p_value)))  # Format p-values

# Apply formatting function and create significance indicators
results <- results %>%
  mutate(
    pval_text = paste0("p=", sapply(p.value, format_p_value)),
    sig = ifelse(p.value < 0.05, "Significant", "Non-Significant"),  # Create significance group
    alpha_level = ifelse(p.value < 0.05, 1, 0.3)  # More transparency for non-sig points
  )

# Determine x position for p-values (offset to the right of the max estimate)
x_max <- max(results$conf.high, na.rm = TRUE)  # Get max confidence interval upper bound
results <- results %>% mutate(pval_x_pos = x_max + 1)  # Place p-values slightly to the right of max x range

# Plot 
title <- "Home: Log T25FW ~ Demographic/Disease Info"
p <- univariate_regression_plot(results, title, 1)
p

ggsave(file.path(output_dir, "t25fw_home_demographics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Metric univariate models - Home video metrics

### Univariate video metrics 
uni1 <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = log_delta_pix_h_rel_median_pose_hv, y = t25fw_log)) + geom_point()
## Warning: Removed 4 rows containing missing values (`geom_point()`).

hist(resid(uni1))

uni2 <- lm(t25fw_log ~ stride_time_median_sec_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = stride_time_median_sec_pose_hv, y = t25fw_log)) + geom_point()
## Warning: Removed 13 rows containing missing values (`geom_point()`).

hist(resid(uni2))

uni3 <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = mean_cadence_step_per_min_pose_hv, y = t25fw_log)) + geom_point()
## Warning: Removed 12 rows containing missing values (`geom_point()`).

hist(resid(uni3))

uni4 <- lm(t25fw_log ~ stride_width_median_cm_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = stride_width_median_cm_pose_hv, y = t25fw_log)) + geom_point()
## Warning: Removed 12 rows containing missing values (`geom_point()`).

hist(resid(uni4))

uni_results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv",
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv",
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv",
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv", 
                                         AdjRsquared = summary(uni4)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

uni_results <- uni_results %>% mutate(Model = "Unadjusted")

adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
##   Variable                           AdjRsquared
##   <chr>                                    <dbl>
## 1 log_delta_pix_h_rel_median_pose_hv        0.15
## 2 stride_time_median_sec_pose_hv            0.44
## 3 mean_cadence_step_per_min_pose_hv         0.19
## 4 stride_width_median_cm_pose_hv            0.33
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_home_metrics_univariate_regression_adjR.csv")) 

p <- univariate_regression_plot(uni_results, 'Home: Log T25FW ~ Metric', 1)
p

ggsave(file.path(output_dir, "t25fw_home_metrics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Single Metric, adjust 1 confounder - PWS video metrics

## age ---------------------------------------------------
con1_age <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con1_age))

con2_age <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con2_age))

con3_age <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con3_age))

con4_age <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con4_age))

con_age_results <- bind_rows(
  tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_Age")  # Define confounders to remove

con_age_results <- con_age_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_age_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~ -0.283     0.0783      -3.62 6.29e-4  -0.440   -0.126   log_del~ Adju~
## 2 stride~  1.01      0.136        7.40 1.59e-9   0.734    1.28    stride_~ Adju~
## 3 mean_c~ -0.00893   0.00234     -3.82 3.71e-4  -0.0136  -0.00424 mean_ca~ Adju~
## 4 stride~  0.0644    0.0154       4.19 1.13e-4   0.0335   0.0953  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~ -0.286     0.0853      -3.35 1.43e-3  -0.456   -0.115   log_del~
## 2 stride_time_~  1.04      0.161        6.45 4.41e-8   0.716    1.36    stride_~
## 3 mean_cadence~ -0.00940   0.00260     -3.62 6.69e-4  -0.0146  -0.00419 mean_ca~
## 4 stride_width~  0.0763    0.0148       5.14 4.41e-6   0.0465   0.106   stride_~
## 5 log_delta_pi~ -0.283     0.0783      -3.62 6.29e-4  -0.440   -0.126   log_del~
## 6 stride_time_~  1.01      0.136        7.40 1.59e-9   0.734    1.28    stride_~
## 7 mean_cadence~ -0.00893   0.00234     -3.82 3.71e-4  -0.0136  -0.00424 mean_ca~
## 8 stride_width~  0.0644    0.0154       4.19 1.13e-4   0.0335   0.0953  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'Home: Log T25FW ~ Metric + demoEHR_Age', 1)
p

ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_age.png"), 
       bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con1_dur))

con2_dur <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con2_dur))

con3_dur <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con3_dur))

con4_dur <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con4_dur))

con_dur_results <- bind_rows(
  tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_DiseaseDuration")  # Define confounders to remove

con_dur_results
## # A tibble: 8 x 8
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~ -2.86e-1   0.0845    -3.38   1.29e-3 -0.455    -0.117   log_del~
## 2 demoEHR_Dise~  8.36e-3   0.00560    1.49   1.41e-1 -0.00285   0.0196  log_del~
## 3 stride_time_~  1.13e+0   0.155      7.30   2.32e-9  0.819     1.44    stride_~
## 4 demoEHR_Dise~  1.29e-2   0.00466    2.77   7.83e-3  0.00356   0.0223  stride_~
## 5 mean_cadence~ -9.91e-3   0.00258   -3.85   3.39e-4 -0.0151   -0.00474 mean_ca~
## 6 demoEHR_Dise~  9.21e-3   0.00575    1.60   1.16e-1 -0.00235   0.0208  mean_ca~
## 7 stride_width~  7.61e-2   0.0154     4.94   9.13e-6  0.0452    0.107   stride_~
## 8 demoEHR_Dise~  2.53e-4   0.00548    0.0463 9.63e-1 -0.0107    0.0113  stride_~
con_dur_results <- con_dur_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dur_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~ -0.286     0.0845      -3.38 1.29e-3  -0.455   -0.117   log_del~ Adju~
## 2 stride~  1.13      0.155        7.30 2.32e-9   0.819    1.44    stride_~ Adju~
## 3 mean_c~ -0.00991   0.00258     -3.85 3.39e-4  -0.0151  -0.00474 mean_ca~ Adju~
## 4 stride~  0.0761    0.0154       4.94 9.13e-6   0.0452   0.107   stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~ -0.286     0.0853      -3.35 1.43e-3  -0.456   -0.115   log_del~
## 2 stride_time_~  1.04      0.161        6.45 4.41e-8   0.716    1.36    stride_~
## 3 mean_cadence~ -0.00940   0.00260     -3.62 6.69e-4  -0.0146  -0.00419 mean_ca~
## 4 stride_width~  0.0763    0.0148       5.14 4.41e-6   0.0465   0.106   stride_~
## 5 log_delta_pi~ -0.286     0.0845      -3.38 1.29e-3  -0.455   -0.117   log_del~
## 6 stride_time_~  1.13      0.155        7.30 2.32e-9   0.819    1.44    stride_~
## 7 mean_cadence~ -0.00991   0.00258     -3.85 3.39e-4  -0.0151  -0.00474 mean_ca~
## 8 stride_width~  0.0761    0.0154       4.94 9.13e-6   0.0452   0.107   stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'Home: Log T25FW ~ Metric + demoEHR_DiseaseDuration', 1)
p

ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_duration.png"), 
       bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con1_dx))

con2_dx <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con2_dx))

con3_dx <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con3_dx))

con4_dx <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con4_dx))

con_dx_results <- bind_rows(
  tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("ms_dx_condensed")  # Define confounders to remove

con_dx_results
## # A tibble: 12 x 8
##    term         estimate std.error statistic p.value conf.low conf.high Variable
##    <chr>           <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_p~ -0.180     0.0747    -2.41   1.92e-2 -0.330    -0.0305  log_del~
##  2 ms_dx_conde~  0.597     0.112      5.35   1.65e-6  0.373     0.820   log_del~
##  3 ms_dx_conde~  0.0583    0.156      0.374  7.10e-1 -0.254     0.371   log_del~
##  4 stride_time~  0.406     0.206      1.97   5.43e-2 -0.00784   0.820   stride_~
##  5 ms_dx_conde~  0.588     0.140      4.19   1.18e-4  0.306     0.870   stride_~
##  6 ms_dx_conde~ -0.00402   0.130     -0.0309 9.75e-1 -0.266     0.258   stride_~
##  7 mean_cadenc~ -0.00269   0.00217   -1.24   2.20e-1 -0.00704   0.00166 mean_ca~
##  8 ms_dx_conde~  0.732     0.110      6.64   2.42e-8  0.511     0.954   mean_ca~
##  9 ms_dx_conde~  0.0180    0.133      0.136  8.92e-1 -0.248     0.284   mean_ca~
## 10 stride_widt~  0.0392    0.0135     2.91   5.48e-3  0.0121    0.0664  stride_~
## 11 ms_dx_conde~  0.633     0.107      5.89   3.47e-7  0.417     0.849   stride_~
## 12 ms_dx_conde~ -0.0779    0.129     -0.601  5.50e-1 -0.338     0.182   stride_~
con_dx_results <- con_dx_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dx_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~ -0.180     0.0747      -2.41 0.0192  -0.330    -0.0305  log_del~ Adju~
## 2 stride~  0.406     0.206        1.97 0.0543  -0.00784   0.820   stride_~ Adju~
## 3 mean_c~ -0.00269   0.00217     -1.24 0.220   -0.00704   0.00166 mean_ca~ Adju~
## 4 stride~  0.0392    0.0135       2.91 0.00548  0.0121    0.0664  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~ -0.286     0.0853      -3.35 1.43e-3 -0.456    -0.115   log_del~
## 2 stride_time_~  1.04      0.161        6.45 4.41e-8  0.716     1.36    stride_~
## 3 mean_cadence~ -0.00940   0.00260     -3.62 6.69e-4 -0.0146   -0.00419 mean_ca~
## 4 stride_width~  0.0763    0.0148       5.14 4.41e-6  0.0465    0.106   stride_~
## 5 log_delta_pi~ -0.180     0.0747      -2.41 1.92e-2 -0.330    -0.0305  log_del~
## 6 stride_time_~  0.406     0.206        1.97 5.43e-2 -0.00784   0.820   stride_~
## 7 mean_cadence~ -0.00269   0.00217     -1.24 2.20e-1 -0.00704   0.00166 mean_ca~
## 8 stride_width~  0.0392    0.0135       2.91 5.48e-3  0.0121    0.0664  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'Home: Log T25FW ~ Metric + MS DX', 1)
p

ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_MSDX.png"), 
       bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con1_re))

con2_re <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con2_re))

con3_re <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con3_re))

con4_re <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con4_re))

con_re_results <- bind_rows(
  tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("race_ethnicity_clean")  # Define confounders to remove

con_re_results
## # A tibble: 17 x 8
##    term         estimate std.error statistic p.value conf.low conf.high Variable
##    <chr>           <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_p~ -0.236     0.0835     -2.83  6.49e-3  -0.404   -0.0690  log_del~
##  2 race_ethnic~ -0.334     0.198      -1.68  9.79e-2  -0.732    0.0635  log_del~
##  3 race_ethnic~  0.850     0.342       2.48  1.62e-2   0.164    1.54    log_del~
##  4 race_ethnic~  0.206     0.240       0.860 3.93e-1  -0.274    0.687   log_del~
##  5 race_ethnic~ -0.225     0.123      -1.83  7.22e-2  -0.470    0.0210  log_del~
##  6 stride_time~  0.987     0.152       6.51  4.54e-8   0.682    1.29    stride_~
##  7 race_ethnic~ -0.400     0.191      -2.10  4.13e-2  -0.784   -0.0165  stride_~
##  8 race_ethnic~  0.220     0.191       1.15  2.56e-1  -0.165    0.604   stride_~
##  9 race_ethnic~ -0.235     0.0980     -2.40  2.07e-2  -0.432   -0.0376  stride_~
## 10 mean_cadenc~ -0.00928   0.00241    -3.85  3.53e-4  -0.0141  -0.00443 mean_ca~
## 11 race_ethnic~ -0.460     0.230      -2.00  5.13e-2  -0.924    0.00279 mean_ca~
## 12 race_ethnic~  0.118     0.230       0.514 6.09e-1  -0.345    0.581   mean_ca~
## 13 race_ethnic~ -0.316     0.117      -2.69  9.69e-3  -0.552   -0.0802  mean_ca~
## 14 stride_widt~  0.0684    0.0149      4.59  3.19e-5   0.0385   0.0984  stride_~
## 15 race_ethnic~ -0.393     0.221      -1.78  8.10e-2  -0.837    0.0504  stride_~
## 16 race_ethnic~  0.0253    0.220       0.115 9.09e-1  -0.418    0.468   stride_~
## 17 race_ethnic~ -0.204     0.114      -1.79  8.03e-2  -0.434    0.0256  stride_~
con_re_results <- con_re_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_re_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~ -0.236     0.0835      -2.83 6.49e-3  -0.404   -0.0690  log_del~ Adju~
## 2 stride~  0.987     0.152        6.51 4.54e-8   0.682    1.29    stride_~ Adju~
## 3 mean_c~ -0.00928   0.00241     -3.85 3.53e-4  -0.0141  -0.00443 mean_ca~ Adju~
## 4 stride~  0.0684    0.0149       4.59 3.19e-5   0.0385   0.0984  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~ -0.286     0.0853      -3.35 1.43e-3  -0.456   -0.115   log_del~
## 2 stride_time_~  1.04      0.161        6.45 4.41e-8   0.716    1.36    stride_~
## 3 mean_cadence~ -0.00940   0.00260     -3.62 6.69e-4  -0.0146  -0.00419 mean_ca~
## 4 stride_width~  0.0763    0.0148       5.14 4.41e-6   0.0465   0.106   stride_~
## 5 log_delta_pi~ -0.236     0.0835      -2.83 6.49e-3  -0.404   -0.0690  log_del~
## 6 stride_time_~  0.987     0.152        6.51 4.54e-8   0.682    1.29    stride_~
## 7 mean_cadence~ -0.00928   0.00241     -3.85 3.53e-4  -0.0141  -0.00443 mean_ca~
## 8 stride_width~  0.0684    0.0149       4.59 3.19e-5   0.0385   0.0984  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'Home: Log T25FW ~ Metric + race_ethnicity_clean', 1)
p

ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_RaceEthnicity.png"), 
       bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + clean_sex, data = home_df)
hist(resid(con1_sex))

con2_sex <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + clean_sex, data = home_df)
hist(resid(con2_sex))

con3_sex <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + clean_sex, data = home_df)
hist(resid(con3_sex))

con4_sex <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + clean_sex, data = home_df)
hist(resid(con4_sex))

con_sex_results <- bind_rows(
  tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("clean_sex")  # Define confounders to remove

con_sex_results
## # A tibble: 9 x 8
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~ -0.268     0.0894     -3.00  3.96e-3  -0.447   -0.0895  log_del~
## 2 clean_sexMale -0.0901    0.125      -0.722 4.73e-1  -0.340    0.160   log_del~
## 3 clean_sexNon~ -0.0753    0.261      -0.288 7.74e-1  -0.598    0.448   log_del~
## 4 stride_time_~  1.03      0.162       6.34  6.87e-8   0.703    1.36    stride_~
## 5 clean_sexMale -0.0901    0.105      -0.862 3.93e-1  -0.300    0.120   stride_~
## 6 mean_cadence~ -0.00924   0.00259    -3.57  8.02e-4  -0.0144  -0.00404 mean_ca~
## 7 clean_sexMale -0.143     0.120      -1.20  2.37e-1  -0.384    0.0970  mean_ca~
## 8 stride_width~  0.0837    0.0143      5.87  3.51e-7   0.0550   0.112   stride_~
## 9 clean_sexMale -0.285     0.105      -2.71  9.16e-3  -0.496   -0.0738  stride_~
con_sex_results <- con_sex_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_sex_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~ -0.268     0.0894      -3.00 3.96e-3  -0.447   -0.0895  log_del~ Adju~
## 2 stride~  1.03      0.162        6.34 6.87e-8   0.703    1.36    stride_~ Adju~
## 3 mean_c~ -0.00924   0.00259     -3.57 8.02e-4  -0.0144  -0.00404 mean_ca~ Adju~
## 4 stride~  0.0837    0.0143       5.87 3.51e-7   0.0550   0.112   stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~ -0.286     0.0853      -3.35 1.43e-3  -0.456   -0.115   log_del~
## 2 stride_time_~  1.04      0.161        6.45 4.41e-8   0.716    1.36    stride_~
## 3 mean_cadence~ -0.00940   0.00260     -3.62 6.69e-4  -0.0146  -0.00419 mean_ca~
## 4 stride_width~  0.0763    0.0148       5.14 4.41e-6   0.0465   0.106   stride_~
## 5 log_delta_pi~ -0.268     0.0894      -3.00 3.96e-3  -0.447   -0.0895  log_del~
## 6 stride_time_~  1.03      0.162        6.34 6.87e-8   0.703    1.36    stride_~
## 7 mean_cadence~ -0.00924   0.00259     -3.57 8.02e-4  -0.0144  -0.00404 mean_ca~
## 8 stride_width~  0.0837    0.0143       5.87 3.51e-7   0.0550   0.112   stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'Home: Log T25FW ~ Metric + Sex', 1)
p

ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_sex.png"), 
       bg = "white", width= 10, height=10)

Multivariate - Home –> T25fw

Metrics only - not including double support/stance measures, too many missing. May include after improving code

# HOme 

# Unadjusted 
all_unadj <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + 
                  stride_time_median_sec_pose_hv + 
                  mean_cadence_step_per_min_pose_hv + 
                  stride_width_median_cm_pose_hv, 
                data = home_df)

summary(all_unadj)
## 
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + 
##     stride_time_median_sec_pose_hv + mean_cadence_step_per_min_pose_hv + 
##     stride_width_median_cm_pose_hv, data = home_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39417 -0.17948  0.05974  0.17898  0.46313 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                         0.516539   0.454806   1.136  0.26195   
## log_delta_pix_h_rel_median_pose_hv -0.276928   0.118345  -2.340  0.02368 * 
## stride_time_median_sec_pose_hv      0.466013   0.220894   2.110  0.04036 * 
## mean_cadence_step_per_min_pose_hv  -0.003278   0.002541  -1.290  0.20353   
## stride_width_median_cm_pose_hv      0.045762   0.013099   3.494  0.00107 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2361 on 46 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.6514, Adjusted R-squared:  0.6211 
## F-statistic: 21.49 on 4 and 46 DF,  p-value: 4.766e-10
hist(resid(all_unadj))

# Adjusted 
all_adjusted <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + 
                     stride_time_median_sec_pose_hv + 
                     mean_cadence_step_per_min_pose_hv + 
                     stride_width_median_cm_pose_hv + 
                     demoEHR_Age + 
                     demoEHR_DiseaseDuration +
                     ms_dx_condensed + 
                     race_ethnicity_clean + 
                     clean_sex, 
                   data = home_df)

summary(all_adjusted)
## 
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + 
##     stride_time_median_sec_pose_hv + mean_cadence_step_per_min_pose_hv + 
##     stride_width_median_cm_pose_hv + demoEHR_Age + demoEHR_DiseaseDuration + 
##     ms_dx_condensed + race_ethnicity_clean + clean_sex, data = home_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39260 -0.10178 -0.00946  0.11447  0.34018 
## 
## Coefficients:
##                                             Estimate Std. Error t value
## (Intercept)                                 0.791302   0.431103   1.836
## log_delta_pix_h_rel_median_pose_hv         -0.050404   0.110532  -0.456
## stride_time_median_sec_pose_hv              0.373112   0.212162   1.759
## mean_cadence_step_per_min_pose_hv          -0.002326   0.002032  -1.144
## stride_width_median_cm_pose_hv              0.037122   0.012851   2.889
## demoEHR_Age                                 0.001887   0.002976   0.634
## demoEHR_DiseaseDuration                     0.006831   0.004522   1.510
## ms_dx_condensedProgressive MS               0.298683   0.131944   2.264
## ms_dx_condensedMS, Subtype Not Specified   -0.168827   0.122111  -1.383
## race_ethnicity_cleanAsian                  -0.323931   0.144318  -2.245
## race_ethnicity_cleanHispanic or Latino     -0.026557   0.154113  -0.172
## race_ethnicity_cleanOther/Unknown/Declined -0.233602   0.087876  -2.658
## clean_sexMale                              -0.237104   0.093736  -2.529
##                                            Pr(>|t|)   
## (Intercept)                                 0.07426 . 
## log_delta_pix_h_rel_median_pose_hv          0.65098   
## stride_time_median_sec_pose_hv              0.08669 . 
## mean_cadence_step_per_min_pose_hv           0.25964   
## stride_width_median_cm_pose_hv              0.00636 **
## demoEHR_Age                                 0.52982   
## demoEHR_DiseaseDuration                     0.13922   
## ms_dx_condensedProgressive MS               0.02938 * 
## ms_dx_condensedMS, Subtype Not Specified    0.17487   
## race_ethnicity_cleanAsian                   0.03070 * 
## race_ethnicity_cleanHispanic or Latino      0.86410   
## race_ethnicity_cleanOther/Unknown/Declined  0.01143 * 
## clean_sexMale                               0.01569 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1829 on 38 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.8271, Adjusted R-squared:  0.7726 
## F-statistic: 15.15 on 12 and 38 DF,  p-value: 5.692e-11
hist(resid(all_adjusted))

# format for plotting 
# Unadjusted 
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>% 
  mutate(Model = "Unadjusted",
         AdjRsquared = summary(all_unadj)$adj.r.squared)

# Adjusted 
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>% 
  mutate(Model = "Adjusted",
         AdjRsquared = summary(all_adjusted)$adj.r.squared)

# combine and plot 
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>% 
  filter(term != "(Intercept)")  
  
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")]  %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
##   Model      AdjRsquared
##   <chr>            <dbl>
## 1 Unadjusted        0.62
## 2 Adjusted          0.77
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_home_allMetrics_unadjusted_vs_adjusted_adjR.csv")) 


p <- adj_vs_unadj_regression_plot(multivar_results, 'Home: Log T25FW Multivariate - Unadjusted vs Adjusted', 1)
p

ggsave(file.path(output_dir, "t25fw_home_allMetrics_unadjusted_vs_adjusted.png"), 
       bg = "white", width= 10, height=10)

PWS Velocity Zeno Velocity Linear Regression

ggplot(data = zeno_pws_df, aes(PWS_velocitycmsecmean)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

home_df$log_PWS_velocitycmsecmean <- log(home_df$PWS_velocitycmsecmean)
ggplot(data = home_df, aes(PWS_velocitycmsecmean)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

PWS Zeno Videos –> PWS Velocity Zeno Velocity

Demographics univariate models

# PWS ------------------------------------------
uni1 <- lm(PWS_velocitycmsecmean ~ demoEHR_Age, data = zeno_pws_df)
hist(resid(uni1)) 

uni2 <- lm(PWS_velocitycmsecmean ~ demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(uni2)) 

uni3 <- lm(PWS_velocitycmsecmean ~ ms_dx_condensed, data = zeno_pws_df)
hist(resid(uni3)) 

uni4 <- lm(PWS_velocitycmsecmean ~ race_ethnicity_clean, data = zeno_pws_df)
hist(resid(uni4)) 

uni5 <- lm(PWS_velocitycmsecmean ~ clean_sex, data = zeno_pws_df)
hist(resid(uni4)) 

## Extract regression results from each model 
results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age", 
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration", 
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed", 
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean", 
                                         AdjRsquared = summary(uni4)$adj.r.squared),
  tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex", 
                                         AdjRsquared = summary(uni5)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>% 
  distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
##   Variable                AdjRsquared
##   <chr>                         <dbl>
## 1 demoEHR_Age                    0.05
## 2 demoEHR_DiseaseDuration        0.01
## 3 ms_dx_condensed                0.2 
## 4 race_ethnicity_clean           0.03
## 5 clean_sex                     -0.01
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_pws_demographics_univariate_regression_adjR.csv")) 

# Plot 
title <- "PWS: PWS_velocitycmsecmean ~ Demographic/Disease Info"
p <- univariate_regression_plot(results, title, 5)
p

ggsave(file.path(output_dir, "PWSvel_pws_demographics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Metric univariate models - PWS video metrics

### Univariate video metrics 
uni1 <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = log_delta_pix_h_rel_median_pose_zv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 16 rows containing missing values (`geom_point()`).

summary(uni1)
## 
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv, 
##     data = zeno_pws_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -61.352 -20.446   1.464  15.787  83.677 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         145.625      5.307  27.439  < 2e-16 ***
## log_delta_pix_h_rel_median_pose_zv   29.832      3.506   8.509 3.62e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.3 on 206 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.2601, Adjusted R-squared:  0.2565 
## F-statistic:  72.4 on 1 and 206 DF,  p-value: 3.621e-15
hist(resid(uni1))

uni2 <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = stride_time_median_sec_pose_zv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 48 rows containing missing values (`geom_point()`).

hist(resid(uni2))

uni3 <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = mean_cadence_step_per_min_pose_zv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 40 rows containing missing values (`geom_point()`).

hist(resid(uni3))

uni4 <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = stride_width_median_cm_pose_zv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 40 rows containing missing values (`geom_point()`).

hist(resid(uni4))

uni_results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv",
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv",
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv",
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv",
                                         AdjRsquared = summary(uni4)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

uni_results <- uni_results %>% mutate(Model = "Unadjusted")

adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
##   Variable                           AdjRsquared
##   <chr>                                    <dbl>
## 1 log_delta_pix_h_rel_median_pose_zv        0.26
## 2 stride_time_median_sec_pose_zv            0.23
## 3 mean_cadence_step_per_min_pose_zv         0.29
## 4 stride_width_median_cm_pose_zv            0.06
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_pws_metrics_univariate_regression_adjR.csv")) 

p <- univariate_regression_plot(uni_results, 'PWS: PWS_velocitycmsecmean ~ Metric', 35)
p

ggsave(file.path(output_dir, "PWSvel_pws_metrics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Single Metric, adjust 1 confounder - PWS video metrics

## age ---------------------------------------------------
con1_age <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con1_age))

con2_age <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con2_age))

con3_age <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con3_age))

con4_age <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con4_age))

con_age_results <- bind_rows(
  tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_Age")  # Define confounders to remove

con_age_results <- con_age_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_age_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~   28.5       3.47       8.22 2.23e-14   21.7      35.4   log_del~ Adju~
## 2 strid~  -84.3      11.2       -7.50 3.12e-12 -106.      -62.1   stride_~ Adju~
## 3 mean_~    0.997     0.113      8.79 1.15e-15    0.773     1.22  mean_ca~ Adju~
## 4 strid~   -1.94      0.584     -3.32 1.09e- 3   -3.09     -0.787 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~   29.8       3.51       8.51 3.62e-15   22.9      36.7   log_del~
## 2 stride_time~  -83.7      11.4       -7.36 7.04e-12 -106.      -61.2   stride_~
## 3 mean_cadenc~    0.997     0.116      8.60 3.56e-15    0.769     1.23  mean_ca~
## 4 stride_widt~   -2.06      0.589     -3.50 5.91e- 4   -3.22     -0.897 stride_~
## 5 log_delta_p~   28.5       3.47       8.22 2.23e-14   21.7      35.4   log_del~
## 6 stride_time~  -84.3      11.2       -7.50 3.12e-12 -106.      -62.1   stride_~
## 7 mean_cadenc~    0.997     0.113      8.79 1.15e-15    0.773     1.22  mean_ca~
## 8 stride_widt~   -1.94      0.584     -3.32 1.09e- 3   -3.09     -0.787 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'PWS: PWS_velocitycmsecmean~ Metric + demoEHR_Age', 35)
p

ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_age.png"), 
       bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con1_dur))

con2_dur <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con2_dur))

con3_dur <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con3_dur))

con4_dur <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con4_dur))

con_dur_results <- bind_rows(
  tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_DiseaseDuration")  # Define confounders to remove

con_dur_results
## # A tibble: 8 x 8
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~   29.6       3.51       8.43 5.94e-15   22.7     36.5    log_del~
## 2 demoEHR_Dis~   -0.297     0.225     -1.32 1.88e- 1   -0.740    0.146  log_del~
## 3 stride_time~  -85.0      11.3       -7.53 2.72e-12 -107.     -62.7    stride_~
## 4 demoEHR_Dis~   -0.429     0.211     -2.04 4.32e- 2   -0.846   -0.0133 stride_~
## 5 mean_cadenc~    1.02      0.115      8.90 5.78e-16    0.794    1.25   mean_ca~
## 6 demoEHR_Dis~   -0.515     0.207     -2.49 1.36e- 2   -0.923   -0.107  mean_ca~
## 7 stride_widt~   -2.01      0.589     -3.41 8.09e- 4   -3.17    -0.844  stride_~
## 8 demoEHR_Dis~   -0.310     0.240     -1.29 1.98e- 1   -0.784    0.163  stride_~
con_dur_results <- con_dur_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dur_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~    29.6      3.51       8.43 5.94e-15   22.7      36.5   log_del~ Adju~
## 2 strid~   -85.0     11.3       -7.53 2.72e-12 -107.      -62.7   stride_~ Adju~
## 3 mean_~     1.02     0.115      8.90 5.78e-16    0.794     1.25  mean_ca~ Adju~
## 4 strid~    -2.01     0.589     -3.41 8.09e- 4   -3.17     -0.844 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~   29.8       3.51       8.51 3.62e-15   22.9      36.7   log_del~
## 2 stride_time~  -83.7      11.4       -7.36 7.04e-12 -106.      -61.2   stride_~
## 3 mean_cadenc~    0.997     0.116      8.60 3.56e-15    0.769     1.23  mean_ca~
## 4 stride_widt~   -2.06      0.589     -3.50 5.91e- 4   -3.22     -0.897 stride_~
## 5 log_delta_p~   29.6       3.51       8.43 5.94e-15   22.7      36.5   log_del~
## 6 stride_time~  -85.0      11.3       -7.53 2.72e-12 -107.      -62.7   stride_~
## 7 mean_cadenc~    1.02      0.115      8.90 5.78e-16    0.794     1.25  mean_ca~
## 8 stride_widt~   -2.01      0.589     -3.41 8.09e- 4   -3.17     -0.844 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'PWS: PWS_velocitycmsecmean~ Metric + demoEHR_DiseaseDuration', 35)
p

ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_duration.png"), 
       bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con1_dx))

con2_dx <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con2_dx))

con3_dx <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con3_dx))

con4_dx <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con4_dx))

con_dx_results <- bind_rows(
  tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("ms_dx_condensed")  # Define confounders to remove

con_dx_results
## # A tibble: 12 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~   21.6       3.65      5.92  1.31e- 8   14.4      28.8   log_del~
##  2 ms_dx_cond~  -26.2       4.96     -5.28  3.34e- 7  -35.9     -16.4   log_del~
##  3 ms_dx_cond~    8.01     17.6       0.454 6.50e- 1  -26.8      42.8   log_del~
##  4 stride_tim~  -78.5      11.4      -6.88  1.06e-10 -101.      -56.0   stride_~
##  5 ms_dx_cond~  -15.9       4.88     -3.26  1.36e- 3  -25.5      -6.25  stride_~
##  6 ms_dx_cond~   27.6      13.1       2.10  3.75e- 2    1.62     53.5   stride_~
##  7 mean_caden~    0.876     0.113     7.73  7.37e-13    0.653     1.10  mean_ca~
##  8 ms_dx_cond~  -20.4       4.56     -4.49  1.29e- 5  -29.4     -11.4   mean_ca~
##  9 ms_dx_cond~   14.8      12.9       1.15  2.52e- 1  -10.6      40.3   mean_ca~
## 10 stride_wid~   -1.25      0.579    -2.16  3.18e- 2   -2.39     -0.110 stride_~
## 11 ms_dx_cond~  -25.7       5.26     -4.89  2.27e- 6  -36.1     -15.3   stride_~
## 12 ms_dx_cond~   12.9      14.7       0.879 3.80e- 1  -16.1      41.9   stride_~
con_dx_results <- con_dx_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dx_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~   21.6       3.65       5.92 1.31e- 8   14.4      28.8   log_del~ Adju~
## 2 strid~  -78.5      11.4       -6.88 1.06e-10 -101.      -56.0   stride_~ Adju~
## 3 mean_~    0.876     0.113      7.73 7.37e-13    0.653     1.10  mean_ca~ Adju~
## 4 strid~   -1.25      0.579     -2.16 3.18e- 2   -2.39     -0.110 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~   29.8       3.51       8.51 3.62e-15   22.9      36.7   log_del~
## 2 stride_time~  -83.7      11.4       -7.36 7.04e-12 -106.      -61.2   stride_~
## 3 mean_cadenc~    0.997     0.116      8.60 3.56e-15    0.769     1.23  mean_ca~
## 4 stride_widt~   -2.06      0.589     -3.50 5.91e- 4   -3.22     -0.897 stride_~
## 5 log_delta_p~   21.6       3.65       5.92 1.31e- 8   14.4      28.8   log_del~
## 6 stride_time~  -78.5      11.4       -6.88 1.06e-10 -101.      -56.0   stride_~
## 7 mean_cadenc~    0.876     0.113      7.73 7.37e-13    0.653     1.10  mean_ca~
## 8 stride_widt~   -1.25      0.579     -2.16 3.18e- 2   -2.39     -0.110 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'PWS: PWS_velocitycmsecmean~ Metric + MS DX',35)
p

ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_MSDX.png"), 
       bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con1_re))

con2_re <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con2_re))

con3_re <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con3_re))

con4_re <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con4_re))

con_re_results <- bind_rows(
  tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("race_ethnicity_clean")  # Define confounders to remove

con_re_results
## # A tibble: 20 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~   30.2       3.42     8.83   5.00e-16   23.4      36.9   log_del~
##  2 race_ethni~   13.4       6.96     1.92   5.57e- 2   -0.327    27.1   log_del~
##  3 race_ethni~  -22.4       7.17    -3.12   2.06e- 3  -36.5      -8.25  log_del~
##  4 race_ethni~    0.140     6.25     0.0224 9.82e- 1  -12.2      12.5   log_del~
##  5 race_ethni~    4.85      6.57     0.739  4.61e- 1   -8.10     17.8   log_del~
##  6 stride_tim~  -80.5      11.2     -7.19   1.92e-11 -103.      -58.4   stride_~
##  7 race_ethni~    2.22      6.83     0.324  7.46e- 1  -11.3      15.7   stride_~
##  8 race_ethni~  -21.3       6.86    -3.10   2.26e- 3  -34.8      -7.73  stride_~
##  9 race_ethni~    5.90      5.70     1.04   3.02e- 1   -5.35     17.2   stride_~
## 10 race_ethni~   -0.897     7.82    -0.115  9.09e- 1  -16.3      14.5   stride_~
## 11 mean_caden~    1.01      0.115    8.83   9.70e-16    0.788     1.24  mean_ca~
## 12 race_ethni~    0.327     6.85     0.0477 9.62e- 1  -13.2      13.9   mean_ca~
## 13 race_ethni~  -23.9       6.85    -3.50   5.98e- 4  -37.4     -10.4   mean_ca~
## 14 race_ethni~    0.543     5.57     0.0974 9.23e- 1  -10.5      11.5   mean_ca~
## 15 race_ethni~    7.32      7.56     0.968  3.35e- 1   -7.61     22.2   mean_ca~
## 16 stride_wid~   -1.89      0.593   -3.19   1.70e- 3   -3.06     -0.719 stride_~
## 17 race_ethni~    5.37      8.02     0.669  5.04e- 1  -10.5      21.2   stride_~
## 18 race_ethni~  -20.9       8.07    -2.59   1.05e- 2  -36.8      -4.94  stride_~
## 19 race_ethni~    0.876     6.51     0.135  8.93e- 1  -12.0      13.7   stride_~
## 20 race_ethni~   -5.80      8.70    -0.667  5.06e- 1  -23.0      11.4   stride_~
con_re_results <- con_re_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_re_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~    30.2      3.42       8.83 5.00e-16   23.4      36.9   log_del~ Adju~
## 2 strid~   -80.5     11.2       -7.19 1.92e-11 -103.      -58.4   stride_~ Adju~
## 3 mean_~     1.01     0.115      8.83 9.70e-16    0.788     1.24  mean_ca~ Adju~
## 4 strid~    -1.89     0.593     -3.19 1.70e- 3   -3.06     -0.719 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~   29.8       3.51       8.51 3.62e-15   22.9      36.7   log_del~
## 2 stride_time~  -83.7      11.4       -7.36 7.04e-12 -106.      -61.2   stride_~
## 3 mean_cadenc~    0.997     0.116      8.60 3.56e-15    0.769     1.23  mean_ca~
## 4 stride_widt~   -2.06      0.589     -3.50 5.91e- 4   -3.22     -0.897 stride_~
## 5 log_delta_p~   30.2       3.42       8.83 5.00e-16   23.4      36.9   log_del~
## 6 stride_time~  -80.5      11.2       -7.19 1.92e-11 -103.      -58.4   stride_~
## 7 mean_cadenc~    1.01      0.115      8.83 9.70e-16    0.788     1.24  mean_ca~
## 8 stride_widt~   -1.89      0.593     -3.19 1.70e- 3   -3.06     -0.719 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'PWS: PWS_velocitycmsecmean~ Metric + race_ethnicity_clean', 35)
p

ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_RaceEthnicity.png"), 
       bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con1_sex))

con2_sex <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con2_sex))

con3_sex <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con3_sex))

con4_sex <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con4_sex))

con_sex_results <- bind_rows(
  tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("clean_sex")  # Define confounders to remove

con_sex_results
## # A tibble: 12 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~   29.9       3.53     8.46   5.24e-15   22.9      36.9   log_del~
##  2 clean_sexM~   -1.25      4.36    -0.288  7.74e- 1   -9.85      7.34  log_del~
##  3 clean_sexN~    0.306    26.5      0.0115 9.91e- 1  -52.0      52.6   log_del~
##  4 stride_tim~  -84.1      11.4     -7.36   7.12e-12 -107.      -61.6   stride_~
##  5 clean_sexM~    3.53      4.02     0.878  3.81e- 1   -4.40     11.5   stride_~
##  6 clean_sexN~    8.58     23.3      0.369  7.13e- 1  -37.4      54.5   stride_~
##  7 mean_caden~    1.01      0.117    8.68   2.36e-15    0.781     1.24  mean_ca~
##  8 clean_sexM~    5.57      3.99     1.40   1.64e- 1   -2.30     13.4   mean_ca~
##  9 clean_sexN~    4.92     23.4      0.210  8.34e- 1  -41.3      51.1   mean_ca~
## 10 stride_wid~   -2.10      0.592   -3.54   5.02e- 4   -3.27     -0.930 stride_~
## 11 clean_sexM~    3.70      4.59     0.807  4.21e- 1   -5.35     12.8   stride_~
## 12 clean_sexN~   14.5      26.9      0.536  5.92e- 1  -38.7      67.6   stride_~
con_sex_results <- con_sex_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_sex_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~    29.9      3.53       8.46 5.24e-15   22.9      36.9   log_del~ Adju~
## 2 strid~   -84.1     11.4       -7.36 7.12e-12 -107.      -61.6   stride_~ Adju~
## 3 mean_~     1.01     0.117      8.68 2.36e-15    0.781     1.24  mean_ca~ Adju~
## 4 strid~    -2.10     0.592     -3.54 5.02e- 4   -3.27     -0.930 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~   29.8       3.51       8.51 3.62e-15   22.9      36.7   log_del~
## 2 stride_time~  -83.7      11.4       -7.36 7.04e-12 -106.      -61.2   stride_~
## 3 mean_cadenc~    0.997     0.116      8.60 3.56e-15    0.769     1.23  mean_ca~
## 4 stride_widt~   -2.06      0.589     -3.50 5.91e- 4   -3.22     -0.897 stride_~
## 5 log_delta_p~   29.9       3.53       8.46 5.24e-15   22.9      36.9   log_del~
## 6 stride_time~  -84.1      11.4       -7.36 7.12e-12 -107.      -61.6   stride_~
## 7 mean_cadenc~    1.01      0.117      8.68 2.36e-15    0.781     1.24  mean_ca~
## 8 stride_widt~   -2.10      0.592     -3.54 5.02e- 4   -3.27     -0.930 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'PWS: PWS_velocitycmsecmean~ Metric + Sex', 35)
p

ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_sex.png"), 
       bg = "white", width= 10, height=10)

Multivariate - PWS

Metrics only - not including double support/stance measures, too many missing. May include after improving code

# PWS 

# Unadjusted 
all_unadj <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + 
                  stride_time_median_sec_pose_zv + 
                  mean_cadence_step_per_min_pose_zv + 
                  stride_width_median_cm_pose_zv, 
                data = zeno_pws_df)

summary(all_unadj)
## 
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + 
##     stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv + 
##     stride_width_median_cm_pose_zv, data = zeno_pws_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -96.874 -13.154   0.782  16.072  39.382 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        111.5979    35.5240   3.141 0.002009 ** 
## log_delta_pix_h_rel_median_pose_zv   5.4194     3.8686   1.401 0.163234    
## stride_time_median_sec_pose_zv     -44.1353    16.2467  -2.717 0.007336 ** 
## mean_cadence_step_per_min_pose_zv    0.6453     0.1876   3.440 0.000746 ***
## stride_width_median_cm_pose_zv      -1.4279     0.5475  -2.608 0.009989 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.68 on 157 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.3772, Adjusted R-squared:  0.3613 
## F-statistic: 23.77 on 4 and 157 DF,  p-value: 2.207e-15
hist(resid(all_unadj))

# Adjusted 
all_adjusted <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + 
                     stride_time_median_sec_pose_zv + 
                     mean_cadence_step_per_min_pose_zv + 
                     stride_width_median_cm_pose_zv + 
                     demoEHR_Age + 
                     demoEHR_DiseaseDuration +
                     ms_dx_condensed + 
                     race_ethnicity_clean + 
                     clean_sex, 
                   data = zeno_pws_df)

summary(all_adjusted)
## 
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + 
##     stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv + 
##     stride_width_median_cm_pose_zv + demoEHR_Age + demoEHR_DiseaseDuration + 
##     ms_dx_condensed + race_ethnicity_clean + clean_sex, data = zeno_pws_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -92.112 -13.131   2.296  12.986  40.541 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                    96.5168    36.4630   2.647
## log_delta_pix_h_rel_median_pose_zv              3.4012     3.8527   0.883
## stride_time_median_sec_pose_zv                -35.3353    15.9830  -2.211
## mean_cadence_step_per_min_pose_zv               0.7379     0.1854   3.979
## stride_width_median_cm_pose_zv                 -0.6654     0.5639  -1.180
## demoEHR_Age                                    -0.2455     0.1722  -1.426
## demoEHR_DiseaseDuration                        -0.2610     0.2401  -1.087
## ms_dx_condensedProgressive MS                 -11.0117     5.5688  -1.977
## ms_dx_condensedMS, Subtype Not Specified       13.1157    15.0333   0.872
## race_ethnicity_cleanAsian                      -0.5339     7.1708  -0.074
## race_ethnicity_cleanBlack Or African American -21.6522     6.4552  -3.354
## race_ethnicity_cleanHispanic or Latino          1.3116     5.7340   0.229
## race_ethnicity_cleanOther/Unknown/Declined     -0.8366     7.6437  -0.109
## clean_sexMale                                   4.1575     3.9663   1.048
## clean_sexNon-Binary                            -1.9911    20.8628  -0.095
##                                               Pr(>|t|)    
## (Intercept)                                   0.009007 ** 
## log_delta_pix_h_rel_median_pose_zv            0.378786    
## stride_time_median_sec_pose_zv                0.028594 *  
## mean_cadence_step_per_min_pose_zv             0.000108 ***
## stride_width_median_cm_pose_zv                0.239905    
## demoEHR_Age                                   0.156094    
## demoEHR_DiseaseDuration                       0.278775    
## ms_dx_condensedProgressive MS                 0.049869 *  
## ms_dx_condensedMS, Subtype Not Specified      0.384391    
## race_ethnicity_cleanAsian                     0.940754    
## race_ethnicity_cleanBlack Or African American 0.001013 ** 
## race_ethnicity_cleanHispanic or Latino        0.819384    
## race_ethnicity_cleanOther/Unknown/Declined    0.912991    
## clean_sexMale                                 0.296267    
## clean_sexNon-Binary                           0.924099    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.55 on 147 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.4761, Adjusted R-squared:  0.4262 
## F-statistic:  9.54 on 14 and 147 DF,  p-value: 8.586e-15
hist(resid(all_adjusted))

# format for plotting 
# Unadjusted 
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>% 
  mutate(Model = "Unadjusted",
         AdjRsquared = summary(all_unadj)$adj.r.squared)


# Adjusted 
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>% 
  mutate(Model = "Adjusted",
         AdjRsquared = summary(all_adjusted)$adj.r.squared)


# combine and plot 
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>% 
  filter(term != "(Intercept)")  # Remove intercept term
  
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
##   Model      AdjRsquared
##   <chr>            <dbl>
## 1 Unadjusted        0.36
## 2 Adjusted          0.43
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_pws_allMetrics_unadjusted_vs_adjusted_adjR.csv"))

p <- adj_vs_unadj_regression_plot(multivar_results, 'PWS: PWS_velocitycmsecmean Multivariate - Unadjusted vs Adjusted', 35)
p

ggsave(file.path(output_dir, "PWSvel_pws_allMetrics_unadjusted_vs_adjusted.png"), 
       bg = "white", width= 10, height=10)

Home Videos –> PWS Velocity Zeno Velocity

Home Univariate models with disease info and demographics

uni1 <- lm(PWS_velocitycmsecmean ~ demoEHR_Age, data = home_df)
hist(resid(uni1)) 

uni2 <- lm(PWS_velocitycmsecmean ~ demoEHR_DiseaseDuration, data = home_df)
hist(resid(uni2)) 

uni3 <- lm(PWS_velocitycmsecmean ~ ms_dx_condensed, data = home_df)
hist(resid(uni3)) 

uni4 <- lm(PWS_velocitycmsecmean ~ race_ethnicity_clean, data = home_df)
hist(resid(uni4)) 

uni5 <- lm(PWS_velocitycmsecmean ~ clean_sex, data = home_df)
hist(resid(uni4)) 

## Extract regression results from each model 
results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age", 
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration", 
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed", 
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean", 
                                         AdjRsquared = summary(uni4)$adj.r.squared),
  tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex", 
                                         AdjRsquared = summary(uni5)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
##   Variable                AdjRsquared
##   <chr>                         <dbl>
## 1 demoEHR_Age                    0.03
## 2 demoEHR_DiseaseDuration        0.02
## 3 ms_dx_condensed                0.27
## 4 race_ethnicity_clean           0.16
## 5 clean_sex                      0.06
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_home_demographics_univariate_regression_adjR.csv"))

# Apply rounding function to p-value columns 
results <- results %>% mutate(pval_text = paste0("p=", sapply(p.value, format_p_value)))  # Format p-values

# Apply formatting function and create significance indicators
results <- results %>%
  mutate(
    pval_text = paste0("p=", sapply(p.value, format_p_value)),
    sig = ifelse(p.value < 0.05, "Significant", "Non-Significant"),  # Create significance group
    alpha_level = ifelse(p.value < 0.05, 1, 0.3)  # More transparency for non-sig points
  )

# Determine x position for p-values (offset to the right of the max estimate)
x_max <- max(results$conf.high, na.rm = TRUE)  # Get max confidence interval upper bound
results <- results %>% mutate(pval_x_pos = x_max + 1)  # Place p-values slightly to the right of max x range

# Plot 
title <- "Home: PWS_velocitycmsecmean ~ Demographic/MS Info"
p <- univariate_regression_plot(results, title, 35)
p

ggsave(file.path(output_dir, "PWSvel_home_demographics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Metric univariate models - Home video metrics

### Univariate video metrics 
uni1 <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = log_delta_pix_h_rel_median_pose_hv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 4 rows containing missing values (`geom_point()`).

hist(resid(uni1))

uni2 <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = stride_time_median_sec_pose_hv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 13 rows containing missing values (`geom_point()`).

hist(resid(uni2))

uni3 <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = mean_cadence_step_per_min_pose_hv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 12 rows containing missing values (`geom_point()`).

hist(resid(uni3))

uni4 <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = stride_width_median_cm_pose_hv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 12 rows containing missing values (`geom_point()`).

hist(resid(uni4))

uni_results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv",
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv",
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv",
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv",
                                         AdjRsquared = summary(uni4)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

uni_results <- uni_results %>% mutate(Model = "Unadjusted")

adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
##   Variable                           AdjRsquared
##   <chr>                                    <dbl>
## 1 log_delta_pix_h_rel_median_pose_hv        0.23
## 2 stride_time_median_sec_pose_hv            0.3 
## 3 mean_cadence_step_per_min_pose_hv         0.04
## 4 stride_width_median_cm_pose_hv            0.18
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_home_metrics_univariate_regression_adjR.csv"))

p <- univariate_regression_plot(uni_results, 'PWS_velocitycmsecmean ~ Metric', 35)
p

ggsave(file.path(output_dir, "PWSvel_home_metrics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Single Metric, adjust 1 confounder - PWS video metrics

## age ---------------------------------------------------
con1_age <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con1_age))

con2_age <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con2_age))

con3_age <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con3_age))

con4_age <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con4_age))

con_age_results <- bind_rows(
  tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_Age")  # Define confounders to remove

con_age_results <- con_age_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_age_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~   25.2       5.73       4.40 4.71e-5  13.7       36.7   log_del~ Adju~
## 2 stride~  -63.7      13.2       -4.81 1.46e-5 -90.3      -37.1   stride_~ Adju~
## 3 mean_c~    0.357     0.209      1.70 9.46e-2  -0.0638     0.777 mean_ca~ Adju~
## 4 stride~   -3.88      1.34      -2.90 5.53e-3  -6.57      -1.19  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~   25.3       5.89       4.30 6.51e-5  13.5       37.1   log_del~
## 2 stride_time_~  -65.2      13.8       -4.74 1.84e-5 -92.9      -37.6   stride_~
## 3 mean_cadence~    0.380     0.215      1.77 8.32e-2  -0.0517     0.812 mean_ca~
## 4 stride_width~   -4.37      1.25      -3.50 9.74e-4  -6.88      -1.87  stride_~
## 5 log_delta_pi~   25.2       5.73       4.40 4.71e-5  13.7       36.7   log_del~
## 6 stride_time_~  -63.7      13.2       -4.81 1.46e-5 -90.3      -37.1   stride_~
## 7 mean_cadence~    0.357     0.209      1.70 9.46e-2  -0.0638     0.777 mean_ca~
## 8 stride_width~   -3.88      1.34      -2.90 5.53e-3  -6.57      -1.19  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'Home: PWS_velocitycmsecmean ~ Metric + demoEHR_Age', 35)
p

ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_age.png"), 
       bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con1_dur))

con2_dur <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con2_dur))

con3_dur <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con3_dur))

con4_dur <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con4_dur))

con_dur_results <- bind_rows(
  tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_DiseaseDuration")  # Define confounders to remove

con_dur_results
## # A tibble: 8 x 8
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~   25.3       5.86      4.32  6.18e-5  13.6       37.1   log_del~
## 2 demoEHR_Dise~   -0.483     0.389    -1.24  2.19e-1  -1.26       0.295 log_del~
## 3 stride_time_~  -69.7      13.9      -5.01  7.56e-6 -97.7      -41.7   stride_~
## 4 demoEHR_Dise~   -0.632     0.419    -1.51  1.37e-1  -1.47       0.209 stride_~
## 5 mean_cadence~    0.400     0.218     1.84  7.18e-2  -0.0368     0.837 mean_ca~
## 6 demoEHR_Dise~   -0.368     0.486    -0.756 4.53e-1  -1.34       0.609 mean_ca~
## 7 stride_width~   -4.44      1.30     -3.43  1.23e-3  -7.05      -1.84  stride_~
## 8 demoEHR_Dise~    0.106     0.461     0.230 8.19e-1  -0.820      1.03  stride_~
con_dur_results <- con_dur_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dur_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~   25.3       5.86       4.32 6.18e-5  13.6       37.1   log_del~ Adju~
## 2 stride~  -69.7      13.9       -5.01 7.56e-6 -97.7      -41.7   stride_~ Adju~
## 3 mean_c~    0.400     0.218      1.84 7.18e-2  -0.0368     0.837 mean_ca~ Adju~
## 4 stride~   -4.44      1.30      -3.43 1.23e-3  -7.05      -1.84  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~   25.3       5.89       4.30 6.51e-5  13.5       37.1   log_del~
## 2 stride_time_~  -65.2      13.8       -4.74 1.84e-5 -92.9      -37.6   stride_~
## 3 mean_cadence~    0.380     0.215      1.77 8.32e-2  -0.0517     0.812 mean_ca~
## 4 stride_width~   -4.37      1.25      -3.50 9.74e-4  -6.88      -1.87  stride_~
## 5 log_delta_pi~   25.3       5.86       4.32 6.18e-5  13.6       37.1   log_del~
## 6 stride_time_~  -69.7      13.9       -5.01 7.56e-6 -97.7      -41.7   stride_~
## 7 mean_cadence~    0.400     0.218      1.84 7.18e-2  -0.0368     0.837 mean_ca~
## 8 stride_width~   -4.44      1.30      -3.43 1.23e-3  -7.05      -1.84  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'Home: PWS_velocitycmsecmean ~ Metric + demoEHR_DiseaseDuration', 35)
p

ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_duration.png"), 
       bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con1_dx))

con2_dx <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con2_dx))

con3_dx <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con3_dx))

con4_dx <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con4_dx))

con_dx_results <- bind_rows(
  tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("ms_dx_condensed")  # Define confounders to remove

con_dx_results
## # A tibble: 12 x 8
##    term         estimate std.error statistic p.value conf.low conf.high Variable
##    <chr>           <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_p~   18.0       5.34      3.37  1.36e-3    7.28     28.7   log_del~
##  2 ms_dx_conde~  -37.6       7.98     -4.72  1.58e-5  -53.6     -21.7   log_del~
##  3 ms_dx_conde~    4.84     11.2       0.434 6.66e-1  -17.5      27.2   log_del~
##  4 stride_time~  -17.3      17.9      -0.968 3.38e-1  -53.3      18.7   stride_~
##  5 ms_dx_conde~  -44.3      12.2      -3.64  6.70e-4  -68.8     -19.8   stride_~
##  6 ms_dx_conde~   11.3      11.3       1.00  3.21e-1  -11.4      34.1   stride_~
##  7 mean_cadenc~   -0.139     0.185    -0.752 4.55e-1   -0.512     0.233 mean_ca~
##  8 ms_dx_conde~  -56.9       9.43     -6.03  2.10e-7  -75.8     -37.9   mean_ca~
##  9 ms_dx_conde~    8.83     11.3       0.779 4.39e-1  -13.9      31.6   mean_ca~
## 10 stride_widt~   -1.85      1.21     -1.53  1.32e-1   -4.28      0.576 stride_~
## 11 ms_dx_conde~  -45.8       9.62     -4.76  1.74e-5  -65.1     -26.5   stride_~
## 12 ms_dx_conde~   14.6      11.6       1.26  2.12e-1   -8.65     37.9   stride_~
con_dx_results <- con_dx_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dx_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~   18.0       5.34      3.37  0.00136    7.28     28.7   log_del~ Adju~
## 2 stride~  -17.3      17.9      -0.968 0.338    -53.3      18.7   stride_~ Adju~
## 3 mean_c~   -0.139     0.185    -0.752 0.455     -0.512     0.233 mean_ca~ Adju~
## 4 stride~   -1.85      1.21     -1.53  0.132     -4.28      0.576 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~   25.3       5.89      4.30  6.51e-5  13.5       37.1   log_del~
## 2 stride_time_~  -65.2      13.8      -4.74  1.84e-5 -92.9      -37.6   stride_~
## 3 mean_cadence~    0.380     0.215     1.77  8.32e-2  -0.0517     0.812 mean_ca~
## 4 stride_width~   -4.37      1.25     -3.50  9.74e-4  -6.88      -1.87  stride_~
## 5 log_delta_pi~   18.0       5.34      3.37  1.36e-3   7.28      28.7   log_del~
## 6 stride_time_~  -17.3      17.9      -0.968 3.38e-1 -53.3       18.7   stride_~
## 7 mean_cadence~   -0.139     0.185    -0.752 4.55e-1  -0.512      0.233 mean_ca~
## 8 stride_width~   -1.85      1.21     -1.53  1.32e-1  -4.28       0.576 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'Home: PWS_velocitycmsecmean ~ Metric + MS DX', 35)
p

ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_MSDX.png"), 
       bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con1_re))

con2_re <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con2_re))

con3_re <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con3_re))

con4_re <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con4_re))

con_re_results <- bind_rows(
  tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("race_ethnicity_clean")  # Define confounders to remove

con_re_results
## # A tibble: 17 x 8
##    term         estimate std.error statistic p.value conf.low conf.high Variable
##    <chr>           <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_p~   25.0       5.84      4.28  7.47e-5  13.3       36.7   log_del~
##  2 race_ethnic~   33.5      13.9       2.41  1.93e-2   5.65      61.3   log_del~
##  3 race_ethnic~  -34.2      24.0      -1.43  1.59e-1 -82.3       13.8   log_del~
##  4 race_ethnic~  -26.0      16.8      -1.55  1.27e-1 -59.7        7.61  log_del~
##  5 race_ethnic~    8.23      8.57      0.960 3.41e-1  -8.95      25.4   log_del~
##  6 stride_time~  -62.8      13.4      -4.68  2.46e-5 -89.8      -35.8   stride_~
##  7 race_ethnic~   30.8      16.9       1.82  7.45e-2  -3.17      64.8   stride_~
##  8 race_ethnic~  -23.6      16.9      -1.40  1.70e-1 -57.6       10.4   stride_~
##  9 race_ethnic~   12.0       8.68      1.38  1.75e-1  -5.49      29.4   stride_~
## 10 mean_cadenc~    0.371     0.208     1.78  8.09e-2  -0.0473     0.789 mean_ca~
## 11 race_ethnic~   35.2      19.9       1.77  8.29e-2  -4.76      75.1   mean_ca~
## 12 race_ethnic~  -17.1      19.8      -0.864 3.92e-1 -57.0       22.7   mean_ca~
## 13 race_ethnic~   16.9      10.1       1.68  1.00e-1  -3.38      37.2   mean_ca~
## 14 stride_widt~   -3.84      1.28     -3.01  4.17e-3  -6.40      -1.27  stride_~
## 15 race_ethnic~   30.9      18.9       1.64  1.08e-1  -7.06      68.9   stride_~
## 16 race_ethnic~  -12.0      18.9      -0.639 5.26e-1 -50.0       25.9   stride_~
## 17 race_ethnic~   10.7       9.79      1.09  2.81e-1  -9.00      30.4   stride_~
con_re_results <- con_re_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_re_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~   25.0       5.84       4.28 7.47e-5  13.3       36.7   log_del~ Adju~
## 2 stride~  -62.8      13.4       -4.68 2.46e-5 -89.8      -35.8   stride_~ Adju~
## 3 mean_c~    0.371     0.208      1.78 8.09e-2  -0.0473     0.789 mean_ca~ Adju~
## 4 stride~   -3.84      1.28      -3.01 4.17e-3  -6.40      -1.27  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~   25.3       5.89       4.30 6.51e-5  13.5       37.1   log_del~
## 2 stride_time_~  -65.2      13.8       -4.74 1.84e-5 -92.9      -37.6   stride_~
## 3 mean_cadence~    0.380     0.215      1.77 8.32e-2  -0.0517     0.812 mean_ca~
## 4 stride_width~   -4.37      1.25      -3.50 9.74e-4  -6.88      -1.87  stride_~
## 5 log_delta_pi~   25.0       5.84       4.28 7.47e-5  13.3       36.7   log_del~
## 6 stride_time_~  -62.8      13.4       -4.68 2.46e-5 -89.8      -35.8   stride_~
## 7 mean_cadence~    0.371     0.208      1.78 8.09e-2  -0.0473     0.789 mean_ca~
## 8 stride_width~   -3.84      1.28      -3.01 4.17e-3  -6.40      -1.27  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'Home: PWS_velocitycmsecmean ~ Metric + race_ethnicity_clean', 35)
p

ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_RaceEthnicity.png"), 
       bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + clean_sex, data = home_df)
hist(resid(con1_sex))

con2_sex <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + clean_sex, data = home_df)
hist(resid(con2_sex))

con3_sex <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + clean_sex, data = home_df)
hist(resid(con3_sex))

con4_sex <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + clean_sex, data = home_df)
hist(resid(con4_sex))

con_sex_results <- bind_rows(
  tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
  tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
  tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
  tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("clean_sex")  # Define confounders to remove

con_sex_results
## # A tibble: 9 x 8
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~   23.1       6.07      3.80  3.53e-4  10.9       35.2   log_del~
## 2 clean_sexMale   11.5       8.47      1.36  1.80e-1  -5.46      28.5   log_del~
## 3 clean_sexNon~   17.4      17.7       0.981 3.31e-1 -18.1       52.9   log_del~
## 4 stride_time_~  -63.6      13.6      -4.67  2.38e-5 -91.0      -36.2   stride_~
## 5 clean_sexMale   13.7       8.78      1.56  1.25e-1  -3.93      31.4   stride_~
## 6 mean_cadence~    0.359     0.211     1.71  9.43e-2  -0.0639     0.782 mean_ca~
## 7 clean_sexMale   17.7       9.75      1.82  7.50e-2  -1.85      37.3   mean_ca~
## 8 stride_width~   -5.05      1.19     -4.25  9.34e-5  -7.43      -2.66  stride_~
## 9 clean_sexMale   25.7       8.74      2.94  4.89e-3   8.18      43.3   stride_~
con_sex_results <- con_sex_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_sex_results
## # A tibble: 4 x 9
##   term    estimate std.error statistic p.value conf.low conf.high Variable Model
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_de~   23.1       6.07       3.80 3.53e-4  10.9       35.2   log_del~ Adju~
## 2 stride~  -63.6      13.6       -4.67 2.38e-5 -91.0      -36.2   stride_~ Adju~
## 3 mean_c~    0.359     0.211      1.71 9.43e-2  -0.0639     0.782 mean_ca~ Adju~
## 4 stride~   -5.05      1.19      -4.25 9.34e-5  -7.43      -2.66  stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
##   term          estimate std.error statistic p.value conf.low conf.high Variable
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_pi~   25.3       5.89       4.30 6.51e-5  13.5       37.1   log_del~
## 2 stride_time_~  -65.2      13.8       -4.74 1.84e-5 -92.9      -37.6   stride_~
## 3 mean_cadence~    0.380     0.215      1.77 8.32e-2  -0.0517     0.812 mean_ca~
## 4 stride_width~   -4.37      1.25      -3.50 9.74e-4  -6.88      -1.87  stride_~
## 5 log_delta_pi~   23.1       6.07       3.80 3.53e-4  10.9       35.2   log_del~
## 6 stride_time_~  -63.6      13.6       -4.67 2.38e-5 -91.0      -36.2   stride_~
## 7 mean_cadence~    0.359     0.211      1.71 9.43e-2  -0.0639     0.782 mean_ca~
## 8 stride_width~   -5.05      1.19      -4.25 9.34e-5  -7.43      -2.66  stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'Home: PWS_velocitycmsecmean ~ Metric + Sex', 35)
p

ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_sex.png"), 
       bg = "white", width= 10, height=10)

Multivariate - HOme - all

Metrics only - not including double support/stance measures, too many missing. May include after improving code

# HOme 

# Unadjusted 
all_unadj <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + 
                  stride_time_median_sec_pose_hv + 
                  mean_cadence_step_per_min_pose_hv + 
                  stride_width_median_cm_pose_hv, 
                data = home_df)

summary(all_unadj)
## 
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + 
##     stride_time_median_sec_pose_hv + mean_cadence_step_per_min_pose_hv + 
##     stride_width_median_cm_pose_hv, data = home_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.428 -15.413  -0.333  15.053  39.112 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        217.26491   40.39966   5.378 2.45e-06 ***
## log_delta_pix_h_rel_median_pose_hv  29.67787   10.51239   2.823   0.0070 ** 
## stride_time_median_sec_pose_hv     -34.36527   19.62163  -1.751   0.0865 .  
## mean_cadence_step_per_min_pose_hv   -0.07187    0.22571  -0.318   0.7516    
## stride_width_median_cm_pose_hv      -2.18518    1.16357  -1.878   0.0667 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.97 on 46 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.509,  Adjusted R-squared:  0.4663 
## F-statistic: 11.92 on 4 and 46 DF,  p-value: 9.964e-07
hist(resid(all_unadj))

# Adjusted 
all_adjusted <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + 
                     stride_time_median_sec_pose_hv + 
                     mean_cadence_step_per_min_pose_hv + 
                     stride_width_median_cm_pose_hv + 
                     demoEHR_Age + 
                     demoEHR_DiseaseDuration +
                     ms_dx_condensed + 
                     race_ethnicity_clean + 
                     clean_sex, 
                   data = home_df)

summary(all_adjusted)
## 
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + 
##     stride_time_median_sec_pose_hv + mean_cadence_step_per_min_pose_hv + 
##     stride_width_median_cm_pose_hv + demoEHR_Age + demoEHR_DiseaseDuration + 
##     ms_dx_condensed + race_ethnicity_clean + clean_sex, data = home_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.949 -12.878   1.424  12.322  24.056 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                159.322107  40.797510   3.905
## log_delta_pix_h_rel_median_pose_hv          11.864874  10.460199   1.134
## stride_time_median_sec_pose_hv              -5.506569  20.078003  -0.274
## mean_cadence_step_per_min_pose_hv           -0.184827   0.192318  -0.961
## stride_width_median_cm_pose_hv              -2.182158   1.216133  -1.794
## demoEHR_Age                                  0.278120   0.281678   0.987
## demoEHR_DiseaseDuration                     -0.005673   0.427987  -0.013
## ms_dx_condensedProgressive MS              -42.520371  12.486508  -3.405
## ms_dx_condensedMS, Subtype Not Specified     0.677488  11.555976   0.059
## race_ethnicity_cleanAsian                   35.238467  13.657605   2.580
## race_ethnicity_cleanHispanic or Latino     -23.922461  14.584544  -1.640
## race_ethnicity_cleanOther/Unknown/Declined  11.062739   8.316162   1.330
## clean_sexMale                               25.611581   8.870735   2.887
##                                            Pr(>|t|)    
## (Intercept)                                0.000374 ***
## log_delta_pix_h_rel_median_pose_hv         0.263779    
## stride_time_median_sec_pose_hv             0.785371    
## mean_cadence_step_per_min_pose_hv          0.342603    
## stride_width_median_cm_pose_hv             0.080718 .  
## demoEHR_Age                                0.329707    
## demoEHR_DiseaseDuration                    0.989493    
## ms_dx_condensedProgressive MS              0.001573 ** 
## ms_dx_condensedMS, Subtype Not Specified   0.953557    
## race_ethnicity_cleanAsian                  0.013866 *  
## race_ethnicity_cleanHispanic or Latino     0.109204    
## race_ethnicity_cleanOther/Unknown/Declined 0.191358    
## clean_sexMale                              0.006380 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.31 on 38 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.7237, Adjusted R-squared:  0.6365 
## F-statistic: 8.295 on 12 and 38 DF,  p-value: 2.263e-07
hist(resid(all_adjusted))

# format for plotting 
# Unadjusted 
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>% 
  mutate(Model = "Unadjusted",
         AdjRsquared = summary(all_unadj)$adj.r.squared)
unadj_all_results
## # A tibble: 5 x 9
##   term             estimate std.error statistic p.value conf.low conf.high Model
##   <chr>               <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>
## 1 (Intercept)      217.        40.4       5.38  2.45e-6  136.      299.    Unad~
## 2 log_delta_pix_h~  29.7       10.5       2.82  7.00e-3    8.52     50.8   Unad~
## 3 stride_time_med~ -34.4       19.6      -1.75  8.65e-2  -73.9       5.13  Unad~
## 4 mean_cadence_st~  -0.0719     0.226    -0.318 7.52e-1   -0.526     0.382 Unad~
## 5 stride_width_me~  -2.19       1.16     -1.88  6.67e-2   -4.53      0.157 Unad~
## # i 1 more variable: AdjRsquared <dbl>
# Adjusted 
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>% 
  mutate(Model = "Adjusted",
         AdjRsquared = summary(all_adjusted)$adj.r.squared)

adj_all_results
## # A tibble: 13 x 9
##    term            estimate std.error statistic p.value conf.low conf.high Model
##    <chr>              <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>
##  1 (Intercept)      1.59e+2    40.8      3.91   3.74e-4   76.7     242.    Adju~
##  2 log_delta_pix_~  1.19e+1    10.5      1.13   2.64e-1   -9.31     33.0   Adju~
##  3 stride_time_me~ -5.51e+0    20.1     -0.274  7.85e-1  -46.2      35.1   Adju~
##  4 mean_cadence_s~ -1.85e-1     0.192   -0.961  3.43e-1   -0.574     0.204 Adju~
##  5 stride_width_m~ -2.18e+0     1.22    -1.79   8.07e-2   -4.64      0.280 Adju~
##  6 demoEHR_Age      2.78e-1     0.282    0.987  3.30e-1   -0.292     0.848 Adju~
##  7 demoEHR_Diseas~ -5.67e-3     0.428   -0.0133 9.89e-1   -0.872     0.861 Adju~
##  8 ms_dx_condense~ -4.25e+1    12.5     -3.41   1.57e-3  -67.8     -17.2   Adju~
##  9 ms_dx_condense~  6.77e-1    11.6      0.0586 9.54e-1  -22.7      24.1   Adju~
## 10 race_ethnicity~  3.52e+1    13.7      2.58   1.39e-2    7.59     62.9   Adju~
## 11 race_ethnicity~ -2.39e+1    14.6     -1.64   1.09e-1  -53.4       5.60  Adju~
## 12 race_ethnicity~  1.11e+1     8.32     1.33   1.91e-1   -5.77     27.9   Adju~
## 13 clean_sexMale    2.56e+1     8.87     2.89   6.38e-3    7.65     43.6   Adju~
## # i 1 more variable: AdjRsquared <dbl>
# combine and plot 
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>% 
  filter(term != "(Intercept)")  # remove intercept term 
  
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>% 
  distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
##   Model      AdjRsquared
##   <chr>            <dbl>
## 1 Unadjusted        0.47
## 2 Adjusted          0.64
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_home_allMetrics_unadjusted_vs_adjusted_adjR.csv"))

p <- adj_vs_unadj_regression_plot(multivar_results, 'Home: PWS_velocitycmsecmean Multivariate - Unadjusted vs Adjusted', 35)
p

ggsave(file.path(output_dir, "PWSvel_home_allMetrics_unadjusted_vs_adjusted.png"), 
       bg = "white", width= 10, height=10)

FW Velocity Zeno Velocity Linear Regression

FW Zeno Videos –> FW Velocity Zeno Velocity

Demographics univariate models

# FW ------------------------------------------
uni1 <- lm(FW_velocitycmsecmean ~ demoEHR_Age, data = zeno_fw_df)
hist(resid(uni1)) 

uni2 <- lm(FW_velocitycmsecmean ~ demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(uni2)) 

uni3 <- lm(FW_velocitycmsecmean ~ ms_dx_condensed, data = zeno_fw_df)
hist(resid(uni3)) 

uni4 <- lm(FW_velocitycmsecmean ~ race_ethnicity_clean, data = zeno_fw_df)
hist(resid(uni4)) 

uni5 <- lm(FW_velocitycmsecmean ~ clean_sex, data = zeno_fw_df)
hist(resid(uni4)) 

## Extract regression results from each model 
results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age", 
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration", 
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed", 
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean", 
                                         AdjRsquared = summary(uni4)$adj.r.squared),
  tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex", 
                                         AdjRsquared = summary(uni5)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
##   Variable                AdjRsquared
##   <chr>                         <dbl>
## 1 demoEHR_Age                    0.13
## 2 demoEHR_DiseaseDuration        0.01
## 3 ms_dx_condensed                0.23
## 4 race_ethnicity_clean           0.05
## 5 clean_sex                      0
write.csv(adj_rsqu_df, file.path(output_dir, "FWvel_fw_demographics_univariate_regression_adjR.csv"))

# Plot 
title <- "FW: FW_velocitycmsecmean ~ Demographic/MS Info"
p <- univariate_regression_plot(results, title, 35)
p

ggsave(file.path(output_dir, "FWvel_fw_demographics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Metric univariate models - FW video metrics

### Univariate video metrics 
uni1 <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = log_delta_pix_h_rel_median_pose_zv, y = FW_velocitycmsecmean)) + geom_point()
## Warning: Removed 4 rows containing missing values (`geom_point()`).

hist(resid(uni1))

uni2 <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = stride_time_median_sec_pose_zv, y = FW_velocitycmsecmean)) + geom_point()
## Warning: Removed 53 rows containing missing values (`geom_point()`).

hist(resid(uni2))

uni3 <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = mean_cadence_step_per_min_pose_zv, y = FW_velocitycmsecmean)) + geom_point()
## Warning: Removed 46 rows containing missing values (`geom_point()`).

hist(resid(uni3))

uni4 <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = stride_width_median_cm_pose_zv, y = FW_velocitycmsecmean)) + geom_point()
## Warning: Removed 47 rows containing missing values (`geom_point()`).

hist(resid(uni4))

uni_results <- bind_rows(
  tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv",
                                         AdjRsquared = summary(uni1)$adj.r.squared),
  tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv",
                                         AdjRsquared = summary(uni2)$adj.r.squared),
  tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv",
                                         AdjRsquared = summary(uni3)$adj.r.squared),
  tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv",
                                         AdjRsquared = summary(uni4)$adj.r.squared)) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

uni_results <- uni_results %>% mutate(Model = "Unadjusted")

adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
##   Variable                           AdjRsquared
##   <chr>                                    <dbl>
## 1 log_delta_pix_h_rel_median_pose_zv        0.46
## 2 stride_time_median_sec_pose_zv            0.41
## 3 mean_cadence_step_per_min_pose_zv         0.34
## 4 stride_width_median_cm_pose_zv            0.09
write.csv(adj_rsqu_df, file.path(output_dir, "FWvel_fw_metrics_univariate_regression_adjR.csv"))

p <- univariate_regression_plot(uni_results, 'FW: FW_velocitycmsecmean ~ Metric', 35)
p

ggsave(file.path(output_dir, "FWvel_fw_metrics_univariate_regression_estimates.png"), 
       bg = "white", width= 10, height=10)

Single Metric, adjust 1 confounder - FW video metrics

## age ---------------------------------------------------
con1_age <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con1_age))

con2_age <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con2_age))

con3_age <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con3_age))

con4_age <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con4_age))

con_age_results <- bind_rows(
  tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_Age")  # Define confounders to remove

con_age_results <- con_age_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_age_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~    51.1      3.95      12.9  1.26e-28   43.3       58.9  log_del~ Adju~
## 2 strid~  -161.      14.3      -11.3  2.70e-22 -189.      -133.   stride_~ Adju~
## 3 mean_~     1.15     0.117      9.81 2.46e-18    0.916      1.38 mean_ca~ Adju~
## 4 strid~    -4.58     1.08      -4.25 3.55e- 5   -6.72      -2.45 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~    55.5      4.08      13.6  6.90e-31   47.5       63.5  log_del~
## 2 stride_time~  -165.      15.3      -10.8  5.69e-21 -195.      -135.   stride_~
## 3 mean_cadenc~     1.19     0.125      9.54 1.29e-17    0.944      1.44 mean_ca~
## 4 stride_widt~    -4.81     1.14      -4.21 4.11e- 5   -7.07      -2.56 stride_~
## 5 log_delta_p~    51.1      3.95      12.9  1.26e-28   43.3       58.9  log_del~
## 6 stride_time~  -161.      14.3      -11.3  2.70e-22 -189.      -133.   stride_~
## 7 mean_cadenc~     1.15     0.117      9.81 2.46e-18    0.916      1.38 mean_ca~
## 8 stride_widt~    -4.58     1.08      -4.25 3.55e- 5   -6.72      -2.45 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'FW: FW_velocitycmsecmean ~ Metric + demoEHR_Age', 35)
p

ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_age.png"), 
       bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con1_dur))

con2_dur <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con2_dur))

con3_dur <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con3_dur))

con4_dur <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con4_dur))

con_dur_results <- bind_rows(
  tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("demoEHR_DiseaseDuration")  # Define confounders to remove

con_dur_results
## # A tibble: 8 x 8
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~   55.0       4.08      13.5  2.13e-30   47.0      63.0   log_del~
## 2 demoEHR_Dis~   -0.384     0.278     -1.38 1.68e- 1   -0.932     0.164 log_del~
## 3 stride_time~ -167.       14.9      -11.2  3.84e-22 -196.     -138.    stride_~
## 4 demoEHR_Dis~   -0.972     0.306     -3.17 1.79e- 3   -1.58     -0.367 stride_~
## 5 mean_cadenc~    1.21      0.123      9.78 2.99e-18    0.963     1.45  mean_ca~
## 6 demoEHR_Dis~   -0.802     0.339     -2.36 1.92e- 2   -1.47     -0.133 mean_ca~
## 7 stride_widt~   -4.66      1.15      -4.05 7.69e- 5   -6.93     -2.39  stride_~
## 8 demoEHR_Dis~   -0.471     0.405     -1.17 2.45e- 1   -1.27      0.327 stride_~
con_dur_results <- con_dur_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dur_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~    55.0      4.08      13.5  2.13e-30   47.0       63.0  log_del~ Adju~
## 2 strid~  -167.      14.9      -11.2  3.84e-22 -196.      -138.   stride_~ Adju~
## 3 mean_~     1.21     0.123      9.78 2.99e-18    0.963      1.45 mean_ca~ Adju~
## 4 strid~    -4.66     1.15      -4.05 7.69e- 5   -6.93      -2.39 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~    55.5      4.08      13.6  6.90e-31   47.5       63.5  log_del~
## 2 stride_time~  -165.      15.3      -10.8  5.69e-21 -195.      -135.   stride_~
## 3 mean_cadenc~     1.19     0.125      9.54 1.29e-17    0.944      1.44 mean_ca~
## 4 stride_widt~    -4.81     1.14      -4.21 4.11e- 5   -7.07      -2.56 stride_~
## 5 log_delta_p~    55.0      4.08      13.5  2.13e-30   47.0       63.0  log_del~
## 6 stride_time~  -167.      14.9      -11.2  3.84e-22 -196.      -138.   stride_~
## 7 mean_cadenc~     1.21     0.123      9.78 2.99e-18    0.963      1.45 mean_ca~
## 8 stride_widt~    -4.66     1.15      -4.05 7.69e- 5   -6.93      -2.39 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'FW: FW_velocitycmsecmean ~ Metric + demoEHR_DiseaseDuration', 35)
p

ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_duration.png"), 
       bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con1_dx))

con2_dx <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con2_dx))

con3_dx <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con3_dx))

con4_dx <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con4_dx))

con_dx_results <- bind_rows(
  tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("ms_dx_condensed")  # Define confounders to remove

con_dx_results
## # A tibble: 12 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~   47.0       4.10     11.5   5.07e-24   39.0       55.1  log_del~
##  2 ms_dx_cond~  -33.0       5.85     -5.64  5.22e- 8  -44.5      -21.5  log_del~
##  3 ms_dx_cond~  -25.8      30.8      -0.835 4.04e- 1  -86.6       35.0  log_del~
##  4 stride_tim~ -146.       16.2      -9.01  5.04e-16 -178.      -114.   stride_~
##  5 ms_dx_cond~  -23.3       7.62     -3.06  2.58e- 3  -38.4       -8.28 stride_~
##  6 ms_dx_cond~  -19.1      23.0      -0.830 4.08e- 1  -64.6       26.4  stride_~
##  7 mean_caden~    0.994     0.125     7.96  2.17e-13    0.748      1.24 mean_ca~
##  8 ms_dx_cond~  -36.4       7.53     -4.83  3.01e- 6  -51.2      -21.5  mean_ca~
##  9 ms_dx_cond~  -23.7      24.5      -0.968 3.35e- 1  -72.1       24.7  mean_ca~
## 10 stride_wid~   -3.81      1.05     -3.63  3.68e- 4   -5.87      -1.74 stride_~
## 11 ms_dx_cond~  -52.5       8.03     -6.53  7.12e-10  -68.3      -36.6  stride_~
## 12 ms_dx_cond~   -5.90     27.6      -0.214 8.31e- 1  -60.3       48.5  stride_~
con_dx_results <- con_dx_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_dx_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~   47.0       4.10      11.5  5.07e-24   39.0       55.1  log_del~ Adju~
## 2 strid~ -146.       16.2       -9.01 5.04e-16 -178.      -114.   stride_~ Adju~
## 3 mean_~    0.994     0.125      7.96 2.17e-13    0.748      1.24 mean_ca~ Adju~
## 4 strid~   -3.81      1.05      -3.63 3.68e- 4   -5.87      -1.74 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~   55.5       4.08      13.6  6.90e-31   47.5       63.5  log_del~
## 2 stride_time~ -165.       15.3      -10.8  5.69e-21 -195.      -135.   stride_~
## 3 mean_cadenc~    1.19      0.125      9.54 1.29e-17    0.944      1.44 mean_ca~
## 4 stride_widt~   -4.81      1.14      -4.21 4.11e- 5   -7.07      -2.56 stride_~
## 5 log_delta_p~   47.0       4.10      11.5  5.07e-24   39.0       55.1  log_del~
## 6 stride_time~ -146.       16.2       -9.01 5.04e-16 -178.      -114.   stride_~
## 7 mean_cadenc~    0.994     0.125      7.96 2.17e-13    0.748      1.24 mean_ca~
## 8 stride_widt~   -3.81      1.05      -3.63 3.68e- 4   -5.87      -1.74 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'FW: FW_velocitycmsecmean ~ Metric + MS DX', 35)
p

ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_MSDX.png"), 
       bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con1_re))

con2_re <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con2_re))

con3_re <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con3_re))

con4_re <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con4_re))

con_re_results <- bind_rows(
  tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("race_ethnicity_clean")  # Define confounders to remove

con_re_results
## # A tibble: 20 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~   53.9       4.04    13.3    7.40e-30   46.0       61.9  log_del~
##  2 race_ethni~   -3.48      8.47    -0.410  6.82e- 1  -20.2       13.2  log_del~
##  3 race_ethni~  -26.4       9.08    -2.91   3.98e- 3  -44.3       -8.53 log_del~
##  4 race_ethni~    6.90      7.86     0.877  3.81e- 1   -8.60      22.4  log_del~
##  5 race_ethni~   11.3       8.04     1.41   1.60e- 1   -4.51      27.2  log_del~
##  6 stride_tim~ -157.       15.1    -10.4    1.16e-19 -186.      -127.   stride_~
##  7 race_ethni~   -2.53      9.17    -0.276  7.83e- 1  -20.6       15.6  stride_~
##  8 race_ethni~  -34.5       9.90    -3.48   6.34e- 4  -54.0      -14.9  stride_~
##  9 race_ethni~   -4.32      8.61    -0.502  6.16e- 1  -21.3       12.7  stride_~
## 10 race_ethni~    5.25      9.79     0.537  5.92e- 1  -14.1       24.6  stride_~
## 11 mean_caden~    1.16      0.122    9.47   2.31e-17    0.916      1.40 mean_ca~
## 12 race_ethni~   -1.17     10.0     -0.116  9.08e- 1  -21.0       18.7  mean_ca~
## 13 race_ethni~  -39.2      10.8     -3.64   3.58e- 4  -60.5      -18.0  mean_ca~
## 14 race_ethni~   -4.50      9.20    -0.489  6.25e- 1  -22.7       13.7  mean_ca~
## 15 race_ethni~    6.91     10.8      0.642  5.21e- 1  -14.3       28.1  mean_ca~
## 16 stride_wid~   -4.11      1.18    -3.49   6.21e- 4   -6.44      -1.78 stride_~
## 17 race_ethni~   -0.977    12.1     -0.0809 9.36e- 1  -24.8       22.9  stride_~
## 18 race_ethni~  -38.3      13.0     -2.94   3.70e- 3  -63.9      -12.6  stride_~
## 19 race_ethni~   -2.16     10.9     -0.198  8.44e- 1  -23.7       19.4  stride_~
## 20 race_ethni~    4.50     12.9      0.349  7.27e- 1  -20.9       29.9  stride_~
con_re_results <- con_re_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_re_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~    53.9      4.04      13.3  7.40e-30   46.0       61.9  log_del~ Adju~
## 2 strid~  -157.      15.1      -10.4  1.16e-19 -186.      -127.   stride_~ Adju~
## 3 mean_~     1.16     0.122      9.47 2.31e-17    0.916      1.40 mean_ca~ Adju~
## 4 strid~    -4.11     1.18      -3.49 6.21e- 4   -6.44      -1.78 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~    55.5      4.08      13.6  6.90e-31   47.5       63.5  log_del~
## 2 stride_time~  -165.      15.3      -10.8  5.69e-21 -195.      -135.   stride_~
## 3 mean_cadenc~     1.19     0.125      9.54 1.29e-17    0.944      1.44 mean_ca~
## 4 stride_widt~    -4.81     1.14      -4.21 4.11e- 5   -7.07      -2.56 stride_~
## 5 log_delta_p~    53.9      4.04      13.3  7.40e-30   46.0       61.9  log_del~
## 6 stride_time~  -157.      15.1      -10.4  1.16e-19 -186.      -127.   stride_~
## 7 mean_cadenc~     1.16     0.122      9.47 2.31e-17    0.916      1.40 mean_ca~
## 8 stride_widt~    -4.11     1.18      -3.49 6.21e- 4   -6.44      -1.78 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'FW: FW_velocitycmsecmean~ Metric + race_ethnicity_clean', 35)
p

ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_RaceEthnicity.png"), 
       bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con1_sex))

con2_sex <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con2_sex))

con3_sex <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con3_sex))

con4_sex <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con4_sex))

con_sex_results <- bind_rows(
  tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
  tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
  tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
  tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>% 
  filter(term != "(Intercept)")  # Remove intercept term

confounders <- c("clean_sex")  # Define confounders to remove

con_sex_results
## # A tibble: 12 x 8
##    term        estimate std.error statistic  p.value conf.low conf.high Variable
##    <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
##  1 log_delta_~    55.1      4.09     13.5   2.31e-30   47.1       63.2  log_del~
##  2 clean_sexM~     5.32     5.32      1.00  3.18e- 1   -5.16      15.8  log_del~
##  3 clean_sexN~    14.7     23.4       0.629 5.30e- 1  -31.4       60.9  log_del~
##  4 stride_tim~  -165.      15.2     -10.9   4.08e-21 -195.      -135.   stride_~
##  5 clean_sexM~     8.88     5.84      1.52  1.30e- 1   -2.65      20.4  stride_~
##  6 clean_sexN~    26.1     23.5       1.11  2.69e- 1  -20.3       72.5  stride_~
##  7 mean_caden~     1.19     0.124     9.57  1.13e-17    0.945      1.44 mean_ca~
##  8 clean_sexM~    11.8      6.39      1.85  6.58e- 2   -0.782     24.4  mean_ca~
##  9 clean_sexN~    17.0     25.9       0.656 5.13e- 1  -34.2       68.2  mean_ca~
## 10 stride_wid~    -5.13     1.14     -4.48  1.34e- 5   -7.39      -2.87 stride_~
## 11 clean_sexM~    14.5      7.52      1.93  5.57e- 2   -0.355     29.4  stride_~
## 12 clean_sexN~    35.1     30.2       1.16  2.47e- 1  -24.6       94.8  stride_~
con_sex_results <- con_sex_results %>% 
  filter(!grepl(confounders, term)) %>% # Remove confounders
  mutate(Model = "Adjusted") 

con_sex_results
## # A tibble: 4 x 9
##   term   estimate std.error statistic  p.value conf.low conf.high Variable Model
##   <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    <chr>
## 1 log_d~    55.1      4.09      13.5  2.31e-30   47.1       63.2  log_del~ Adju~
## 2 strid~  -165.      15.2      -10.9  4.08e-21 -195.      -135.   stride_~ Adju~
## 3 mean_~     1.19     0.124      9.57 1.13e-17    0.945      1.44 mean_ca~ Adju~
## 4 strid~    -5.13     1.14      -4.48 1.34e- 5   -7.39      -2.87 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate 
# bind all together to single results df 
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
##   term         estimate std.error statistic  p.value conf.low conf.high Variable
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>   
## 1 log_delta_p~    55.5      4.08      13.6  6.90e-31   47.5       63.5  log_del~
## 2 stride_time~  -165.      15.3      -10.8  5.69e-21 -195.      -135.   stride_~
## 3 mean_cadenc~     1.19     0.125      9.54 1.29e-17    0.944      1.44 mean_ca~
## 4 stride_widt~    -4.81     1.14      -4.21 4.11e- 5   -7.07      -2.56 stride_~
## 5 log_delta_p~    55.1      4.09      13.5  2.31e-30   47.1       63.2  log_del~
## 6 stride_time~  -165.      15.2      -10.9  4.08e-21 -195.      -135.   stride_~
## 7 mean_cadenc~     1.19     0.124      9.57 1.13e-17    0.945      1.44 mean_ca~
## 8 stride_widt~    -5.13     1.14      -4.48 1.34e- 5   -7.39      -2.87 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'FW: FW_velocitycmsecmean ~ Metric + Sex', 35)
p

ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_sex.png"), 
       bg = "white", width= 10, height=10)

Multivariate - FW

Metrics only - not including double support/stance measures, too many missing. May include after improving code

# FW 

# Unadjusted 
all_unadj <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + 
                  stride_time_median_sec_pose_zv + 
                  mean_cadence_step_per_min_pose_zv + 
                  stride_width_median_cm_pose_zv, 
                data = zeno_fw_df)

summary(all_unadj)
## 
## Call:
## lm(formula = FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + 
##     stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv + 
##     stride_width_median_cm_pose_zv, data = zeno_fw_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -77.926 -16.952  -0.967  17.011  75.803 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        252.9380    36.9169   6.852 1.45e-10 ***
## log_delta_pix_h_rel_median_pose_zv  32.7833     4.9370   6.640 4.50e-10 ***
## stride_time_median_sec_pose_zv     -80.2188    20.0400  -4.003 9.50e-05 ***
## mean_cadence_step_per_min_pose_zv    0.2778     0.1695   1.639   0.1031    
## stride_width_median_cm_pose_zv      -2.1265     0.8317  -2.557   0.0115 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.97 on 162 degrees of freedom
##   (55 observations deleted due to missingness)
## Multiple R-squared:  0.5885, Adjusted R-squared:  0.5784 
## F-statistic: 57.93 on 4 and 162 DF,  p-value: < 2.2e-16
hist(resid(all_unadj))

# Adjusted 
all_adjusted <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + 
                     stride_time_median_sec_pose_zv + 
                     mean_cadence_step_per_min_pose_zv + 
                     stride_width_median_cm_pose_zv + 
                     demoEHR_Age + 
                     demoEHR_DiseaseDuration +
                     ms_dx_condensed + 
                     race_ethnicity_clean + 
                     clean_sex, 
                   data = zeno_fw_df)

summary(all_adjusted)
## 
## Call:
## lm(formula = FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + 
##     stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv + 
##     stride_width_median_cm_pose_zv + demoEHR_Age + demoEHR_DiseaseDuration + 
##     ms_dx_condensed + race_ethnicity_clean + clean_sex, data = zeno_fw_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -83.084 -13.704   0.475  14.709  62.648 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                   279.05497   34.97490   7.979
## log_delta_pix_h_rel_median_pose_zv             26.43804    4.59993   5.747
## stride_time_median_sec_pose_zv                -70.43068   18.30922  -3.847
## mean_cadence_step_per_min_pose_zv               0.30823    0.15484   1.991
## stride_width_median_cm_pose_zv                 -1.77231    0.78927  -2.246
## demoEHR_Age                                    -0.94283    0.20187  -4.671
## demoEHR_DiseaseDuration                         0.09378    0.28616   0.328
## ms_dx_condensedProgressive MS                  -9.57995    6.43577  -1.489
## ms_dx_condensedMS, Subtype Not Specified      -21.09913   25.73336  -0.820
## race_ethnicity_cleanAsian                     -10.01438    7.78829  -1.286
## race_ethnicity_cleanBlack Or African American -31.79119    7.89517  -4.027
## race_ethnicity_cleanHispanic or Latino        -10.52545    7.04757  -1.493
## race_ethnicity_cleanOther/Unknown/Declined     -6.10940    8.14599  -0.750
## clean_sexMale                                   8.29111    4.72665   1.754
## clean_sexNon-Binary                             9.88749   18.06623   0.547
##                                               Pr(>|t|)    
## (Intercept)                                   3.33e-13 ***
## log_delta_pix_h_rel_median_pose_zv            4.80e-08 ***
## stride_time_median_sec_pose_zv                0.000176 ***
## mean_cadence_step_per_min_pose_zv             0.048323 *  
## stride_width_median_cm_pose_zv                0.026178 *  
## demoEHR_Age                                   6.56e-06 ***
## demoEHR_DiseaseDuration                       0.743569    
## ms_dx_condensedProgressive MS                 0.138678    
## ms_dx_condensedMS, Subtype Not Specified      0.413549    
## race_ethnicity_cleanAsian                     0.200459    
## race_ethnicity_cleanBlack Or African American 8.90e-05 ***
## race_ethnicity_cleanHispanic or Latino        0.137383    
## race_ethnicity_cleanOther/Unknown/Declined    0.454421    
## clean_sexMale                                 0.081426 .  
## clean_sexNon-Binary                           0.584981    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.81 on 152 degrees of freedom
##   (55 observations deleted due to missingness)
## Multiple R-squared:  0.6963, Adjusted R-squared:  0.6684 
## F-statistic:  24.9 on 14 and 152 DF,  p-value: < 2.2e-16
hist(resid(all_adjusted))

# format for plotting 
# Unadjusted 
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>% 
  mutate(Model = "Unadjusted",
         AdjRsquared = summary(all_unadj)$adj.r.squared)

# Adjusted 
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>% 
  mutate(Model = "Adjusted",
         AdjRsquared = summary(all_adjusted)$adj.r.squared)

# combine and plot 
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>% 
  filter(term != "(Intercept)")  # Remove intercept term
  
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>% 
  distinct() # remove duplicates 
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
##   Model      AdjRsquared
##   <chr>            <dbl>
## 1 Unadjusted        0.58
## 2 Adjusted          0.67
write.csv(adj_rsqu_df, file.path(output_dir, "FWvel_fw_metrics_univariate_regression_adjR.csv"))

p <- adj_vs_unadj_regression_plot(multivar_results, 'FW: FW_velocitycmsecmean Multivariate - Unadjusted vs Adjusted', 35)
p

ggsave(file.path(output_dir, "FWvel_fw_allMetrics_unadjusted_vs_adjusted.png"), 
       bg = "white", width= 10, height=10)

EDSS Ordinal Regression

Preferred Walking Spped –> EDSS

Fast Walking Speed –> EDSS

Home Videos –> EDSS